diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,298239 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.012471056543290615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.012051264755427837, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.007686486933380365, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.007581963669508696, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.007490440737456083, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.006149796303361654, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.023484498262405396, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.011819328181445599, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.007552936673164368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.007426844909787178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.007593472953885794, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.00775101175531745, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.007397458888590336, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.006362225394695997, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.006093429867178202, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.006424714811146259, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006073737516999245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0059728058986365795, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006070122122764587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005968200974166393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006075363606214523, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006068906746804714, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005947618279606104, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.005965524353086948, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.012471056543290615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.012471056543290615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.010656741447746754, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.010100910440087318, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.00512732332572341, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.005007470492273569, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0049019185826182365, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.002375648356974125, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.012893361039459705, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.009867478162050247, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.005020999349653721, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.004815116059035063, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.005012182518839836, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.005292235407978296, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.004784092307090759, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.002946748398244381, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0022767374757677317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0029513330664485693, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.002229205099865794, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00198329403065145, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.002221381990239024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.001969760050997138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0020278985612094402, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00221849512308836, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0016469318652525544, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0019630882889032364, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.010656741447746754, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.010656741447746754, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05376621335744858, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.04409528523683548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03641258552670479, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03015279211103916, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.023219943046569824, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.017389558255672455, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03593381866812706, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03191637247800827, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.024949936196208, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.018745824694633484, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.018389008939266205, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018361922353506088, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015370342880487442, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011464247480034828, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.010336757637560368, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00919465534389019, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006441495381295681, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006004457361996174, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005656999070197344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005034692119807005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004809239413589239, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00512019032612443, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003281002165749669, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003881534095853567, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011464247480034828, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011464247480034828, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.01228232029825449, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.00845341570675373, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.006195549387484789, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.005820611957460642, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.004847732838243246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0032650793436914682, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.007728861179202795, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.006988101173192263, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.005617803428322077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.003918997477740049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.003947100136429071, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.003937163390219212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0035918131470680237, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.002770101185888052, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.002539015142247081, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0021950951777398586, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0020555444061756134, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0019903143402189016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.001956793013960123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0018569082021713257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0015271931188181043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0019256735686212778, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0013423145283013582, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0017823305679485202, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.01228232029825449, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.01228232029825449, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.05616278201341629, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.052899181842803955, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.05193743482232094, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.04727388545870781, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.024114729836583138, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.023266011849045753, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.02658873051404953, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.02469915896654129, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.024362392723560333, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.022292589768767357, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.021429724991321564, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.013412928208708763, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.011932776309549809, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.011690263636410236, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.011634239926934242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.006805737968534231, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.006577933672815561, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.006559842266142368, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.006277306936681271, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.006247797980904579, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0039616660214960575, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.004704238846898079, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0038876605685800314, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.003926567267626524, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.013412928208708763, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.013412928208708763, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.06381543725728989, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.060167908668518066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.05906239151954651, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.05375941842794418, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.02732917293906212, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.026363031938672066, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.030118461698293686, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.028001876547932625, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.027630284428596497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.02522832341492176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.02418624982237816, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.01501238439232111, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.013152224011719227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.012874512001872063, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.012812966480851173, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.007477704901248217, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.0066739474423229694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.00665375217795372, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0062822639010846615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.006243837997317314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.003951584920287132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.004114414099603891, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0038558482192456722, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.002860600594431162, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.013152224011719227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.013152224011719227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.03316492959856987, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.026759199798107147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.021532978862524033, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.01905030757188797, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.01343783549964428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.01003296673297882, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.021308965981006622, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.01934020221233368, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.014760488644242287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.01167087908834219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.011475736275315285, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.010756301693618298, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.009517757222056389, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.007053903769701719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.006346483249217272, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0056069414131343365, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.004626707173883915, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.004371856804937124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.004389077425003052, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.004054791294038296, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0034508435055613518, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.00412770127877593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.0028084763325750828, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.0035913879983127117, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.014760488644242287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.014760488644242287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013047050684690475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.00993376225233078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.005722797475755215, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.005459659267216921, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.005177420098334551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.002492641331627965, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.011250111274421215, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.00923894066363573, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.005929586477577686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0044889627024531364, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.004870050121098757, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.005389494821429253, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.004380328580737114, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0026785607915371656, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.0020980704575777054, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0027226642705500126, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0016769206849858165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00145215995144099, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0015772904735058546, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.001288228901103139, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0015585137298330665, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0015629929257556796, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0010414528660476208, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0012266585836187005, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013047050684690475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013047050684690475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.014103073626756668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.010506678372621536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.00622952077537775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.005826625972986221, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.005427885800600052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0026801524218171835, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.011518456041812897, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.009589053690433502, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.006255818530917168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.004606288857758045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.004928367678076029, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0053178612142801285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.004462190438061953, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.00273520196788013, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.002147440565750003, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.002677708165720105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0016189563320949674, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.001384072587825358, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0014910983154550195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.00117058539763093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0015147788217291236, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0014706511283293366, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0010000455658882856, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0010811393149197102, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.014103073626756668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.014103073626756668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07160818576812744, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05601515248417854, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04834775626659393, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04028010740876198, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.030441150069236755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.023333821445703506, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.04141269996762276, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03783890977501869, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03290469944477081, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02322760783135891, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022255655378103256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.021242808550596237, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.018198924139142036, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01468578353524208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013761121779680252, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010628834366798401, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007760542444884777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007403157185763121, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0063950796611607075, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005761805456131697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005546627100557089, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005511472932994366, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004240730777382851, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003692910773679614, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01468578353524208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01468578353524208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08612267673015594, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05995282903313637, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.047666117548942566, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.038183897733688354, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03493548557162285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.024975361302495003, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.050622906535863876, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04549291357398033, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03973470628261566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.024191301316022873, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024947473779320717, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02596927620470524, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.022529236972332, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.018045686185359955, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016831040382385254, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013616835698485374, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.011373483575880527, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.011034691706299782, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009882183745503426, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00920002069324255, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.008435986936092377, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.009688393212854862, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.007168995216488838, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008377556689083576, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013616835698485374, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013616835698485374, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.07211846858263016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.06851045787334442, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.06745121628046036, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.06162514537572861, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.03063126467168331, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.029741700738668442, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.03360913321375847, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.031157994642853737, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.03090677596628666, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.028519971296191216, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.02745867520570755, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.016859369352459908, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.014802991412580013, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.014581521973013878, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.014538642950356007, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.00845709815621376, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.007891107350587845, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.007882840931415558, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0075281886383891106, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.00750188110396266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.004720555618405342, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.005298845004290342, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.004648321308195591, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.004170505329966545, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.014802991412580013, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.014802991412580013, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.0898798331618309, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.08556007593870163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.08435369282960892, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.07728313654661179, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.038309093564748764, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.037232574075460434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.04197016730904579, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.03893187642097473, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.038636356592178345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.03573152422904968, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.03434086963534355, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.02087327279150486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.01820887066423893, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.017950423061847687, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.017890965566039085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01037703175097704, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.00917146261781454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.009154938161373138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.008684751577675343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.00864829309284687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.005416906904429197, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.005446423310786486, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.005327419377863407, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.0035746272187680006, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01037703175097704, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01037703175097704, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.0710751861333847, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.06447473168373108, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.061293330043554306, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.05688999965786934, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0309014692902565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.02833724394440651, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.03763805702328682, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.03442694619297981, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.03191661089658737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.02810818701982498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.02766948938369751, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.019024018198251724, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.016525043174624443, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.014988288283348083, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.014602463692426682, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.009622441604733467, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.00827145203948021, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.008121099323034286, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.007775231264531612, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.007554999087005854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.005462000146508217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.0059112198650836945, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.005001117940992117, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.004638887010514736, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.014602463692426682, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.014602463692426682, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013675298541784286, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.011601232923567295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.009338953532278538, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.007944769226014614, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.005871581844985485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.004376733209937811, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.00943849515169859, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.008474526926875114, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.006185886450111866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.004907811991870403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.004801874049007893, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.004562413319945335, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.003993879538029432, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0028998677153140306, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.0025832359679043293, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0023092804476618767, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0016559790819883347, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0015077190473675728, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0014936468796804547, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0013059420743957162, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0012862758012488484, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0013522962108254433, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.000961453013587743, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0010138984071090817, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013675298541784286, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.013675298541784286, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.012539597228169441, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.010813837870955467, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.00846678763628006, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.007232126779854298, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.005350364372134209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.003898764494806528, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.009078833274543285, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.008120290003716946, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.005603362340480089, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.004558555316179991, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.004482103046029806, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.004271794576197863, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0037861145101487637, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.002666095970198512, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0023332831915467978, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0021956171840429306, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0015526883071288466, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.001392991398461163, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0014236822025850415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0012333010090515018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0012940356973558664, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0013074029702693224, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0010002842172980309, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0009936470305547118, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.012539597228169441, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.012539597228169441, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.10247095674276352, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0910654291510582, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0868133157491684, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.07482240349054337, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.04581030458211899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.04155968874692917, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.054569635540246964, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.04971783608198166, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.04721520468592644, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03876496106386185, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.03605056181550026, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.027787452563643456, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.023640934377908707, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0219112541526556, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.021472416818141937, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01390275452286005, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.011243900284171104, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.011088576167821884, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00982701312750578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.009548092260956764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.007174036931246519, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00697458116337657, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.006385681685060263, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.004456827417016029, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01390275452286005, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01390275452286005, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1179768294095993, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09086314588785172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0806913673877716, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06692248582839966, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.050543639808893204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03999705612659454, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0646151751279831, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.05854977294802666, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05432514101266861, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03781852498650551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.035238176584243774, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03307555243372917, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.028668837621808052, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.02513466402888298, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.024242602288722992, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.017084665596485138, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014557809568941593, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014308194629848003, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012475320138037205, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01193222962319851, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.010234280489385128, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.011187640950083733, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00920892134308815, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009276250377297401, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014557809568941593, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014557809568941593, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1026967242360115, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.09785637259483337, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.0964808538556099, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.08874554187059402, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.04471715912222862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.043455351144075394, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.049164045602083206, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.04543642699718475, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.04508650675415993, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.04174279794096947, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.04042758792638779, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.024792779237031937, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.021732715889811516, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.021434320136904716, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.02137080952525139, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.012474271468818188, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.011680744588375092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.011663420125842094, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01116638258099556, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.011130486615002155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.007048482540994883, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.00793095026165247, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.006955089047551155, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00632232753559947, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.012474271468818188, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.012474271468818188, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.12377912551164627, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.11805237829685211, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.11642225086688995, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.10713029652833939, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.05393946170806885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0524163544178009, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0592748299241066, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.05482352152466774, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.05440114438533783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.050337180495262146, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0486459955573082, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.029636874794960022, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.025775959715247154, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.025410449132323265, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.02532780170440674, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014760412275791168, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.013059197925031185, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.013034485280513763, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.012375137768685818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.012326560914516449, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.007807925343513489, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.007857074961066246, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.007683871313929558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.005290089175105095, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014760412275791168, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014760412275791168, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.046616021543741226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.044510368257761, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.026710258796811104, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.02516918070614338, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.02094932273030281, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.010768766514956951, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.04888814687728882, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.04026051610708237, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.021193984895944595, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.020091207697987556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.021731408312916756, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.02136601135134697, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.018647601827979088, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.010491942055523396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.007129949051886797, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.009843709878623486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.005701000802218914, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0031588603742420673, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.005564512684941292, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.002881526481360197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0053968592546880245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.005305126309394836, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.0023249394726008177, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.0021165781654417515, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.010768766514956951, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.010768766514956951, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.022158434614539146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.019884923473000526, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.018506845459342003, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.01623566448688507, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.009887222200632095, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.00880009401589632, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.013203554786741734, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.01143423467874527, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.010162338614463806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.008552133105695248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.008110091090202332, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.006665718741714954, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0054580471478402615, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.004781355150043964, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.004610792733728886, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.003364552976563573, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0025749451015144587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.002495464403182268, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.002327614463865757, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.002220957074314356, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0018262416124343872, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0017984028672799468, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0015320759266614914, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0013172461185604334, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.013203554786741734, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.013203554786741734, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.023482242599129677, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.020926829427480698, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.019447481259703636, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.016967235133051872, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.010310830548405647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.009129137732088566, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.012959633022546768, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.011950620450079441, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.010631230659782887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.008866377174854279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.008360431529581547, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.006490944419056177, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.005669445265084505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.004943944048136473, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.004761546850204468, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.003258815035223961, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.002591729396954179, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.002506399992853403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0023131154011934996, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0021955149713903666, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0017245698254555464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0017381650395691395, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.001485146814957261, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0011739550391212106, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.012959633022546768, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.012959633022546768, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.1127585619688034, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1040927916765213, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.10126851499080658, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.09025681763887405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.05120323970913887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.04823216795921326, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.05787833407521248, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.05334765091538429, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.05215000733733177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.045363277196884155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.042691051959991455, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.029257619753479958, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.025357995182275772, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.02439289540052414, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.024153748527169228, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.014596827328205109, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.012374243699014187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.01229860819876194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.011183680966496468, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.011024978943169117, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.007475372403860092, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.007276097312569618, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0070772538892924786, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.004437045194208622, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.014596827328205109, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.014596827328205109, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.12445415556430817, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1068965345621109, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09449369460344315, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07721823453903198, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05578131228685379, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04627542570233345, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0765066146850586, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07000826299190521, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05837809666991234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.045017555356025696, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04238646849989891, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03920118510723114, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0339299812912941, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.027712440118193626, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.026055611670017242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01996045559644699, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015903888270258904, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015206621959805489, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014089584350585938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013123925775289536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011432123370468616, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01241681445389986, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009550151415169239, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00991777703166008, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014089584350585938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014089584350585938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.12253838777542114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.11566674709320068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.11362512409687042, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.10310880839824677, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.05443065986037254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.052368368953466415, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.06019720062613487, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.05562978982925415, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.05503960698843002, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.04976413771510124, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.04742538556456566, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03026498667895794, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.02638803794980049, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.025877783074975014, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.02575933374464512, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01514759287238121, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.013548407703638077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.013518712483346462, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01266742404550314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.012594372034072876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.008185639046132565, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.008535172790288925, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.008012532256543636, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.006150484550744295, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.013548407703638077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.013548407703638077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14831867814064026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1400604248046875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13761036098003387, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12487787753343582, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06593472510576248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06343719363212585, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.072757288813591, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06736364215612411, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06668438762426376, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06028593331575394, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.057320140302181244, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.036525338888168335, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03179444372653961, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03118015080690384, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.031036218628287315, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01818816177546978, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.015926750376820564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.015889007598161697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014813968911767006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.014725329354405403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009488426148891449, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.00945661123842001, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009271690621972084, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.006140672601759434, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014813968911767006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.014725329354405403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.13118980824947357, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.12158440053462982, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.11808991432189941, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.10842006653547287, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.05832447484135628, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.05521627515554428, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.06655053049325943, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.06112553924322128, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0594024583697319, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.05311783030629158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.05152687430381775, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.03357686474919319, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.02900930494070053, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.02780011296272278, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.027513733133673668, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.01680608280003071, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01453104242682457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01443177368491888, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01354710292071104, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.013372986577451229, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.00914478674530983, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.009217552840709686, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.008764284662902355, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.006520169321447611, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01453104242682457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01453104242682457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.02930586412549019, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.025580832734704018, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.023438770323991776, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.02038681134581566, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.012998834252357483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.011233661323785782, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.016995815560221672, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.01533274631947279, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.013510063290596008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.010980412364006042, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.010451202280819416, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.008608822710812092, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.007333097513765097, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.006285954732447863, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.006014962214976549, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.004342859145253897, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0033714701421558857, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.003251611487939954, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0029942661058157682, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0028235383797436953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.002354798838496208, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0023663612082600594, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0019798760768026114, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0016963735688477755, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.013510063290596008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.013510063290596008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.02939978428184986, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.02541029453277588, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.02300199866294861, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.019935399293899536, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.012797183357179165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.010895589366555214, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0167522095143795, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.015428922139108181, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.013347486965358257, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.010751000605523586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.010210358537733555, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.008410926908254623, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.00730849290266633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.006151227280497551, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.005854607559740543, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.004206436686217785, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0032393999863415956, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0031044892966747284, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.002850005403161049, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0026529966853559017, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.002213867846876383, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0022379823494702578, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0018204925581812859, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0015093133551999927, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.013347486965358257, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.013347486965358257, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.11678795516490936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.10672888904809952, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.10342152416706085, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.09157391637563705, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.053306445479393005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.04964819550514221, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.06053081899881363, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.055980127304792404, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.054478492587804794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.04664355143904686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.04365120083093643, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.030646037310361862, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.02665945701301098, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.025459442287683487, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.025143805891275406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.015299062244594097, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.01294692326337099, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.012845720164477825, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.011573239229619503, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01138258259743452, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.007867745123803616, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.007705739699304104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.007412832695990801, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.004761531483381987, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.01294692326337099, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.01294692326337099, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14184533059597015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11743608117103577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10664281249046326, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0871962457895279, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06353261321783066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.052759330719709396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08025246113538742, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07315447926521301, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06678660959005356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04894844442605972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04531954601407051, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.041446834802627563, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03556584566831589, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.031246913596987724, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03015190362930298, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021023569628596306, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0174570232629776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01707354374229908, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014905566349625587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014222332276403904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012022268027067184, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012849101796746254, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010679230093955994, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.01015893928706646, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014905566349625587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014905566349625587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14416450262069702, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.13602857291698456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13362377882003784, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12116718292236328, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06508241593837738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06254379451274872, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07185947895050049, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06654118746519089, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06584883481264114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.059352193027734756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.056428104639053345, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03630905598402023, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.031660277396440506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03102756477892399, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.030879797413945198, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.018119771033525467, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.016212107613682747, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.016171973198652267, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.015116183087229729, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015027232468128204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009682554751634598, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01016618125140667, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009463191032409668, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007259400561451912, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009682554751634598, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009682554751634598, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17084985971450806, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1612650603055954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.15844197571277618, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14372730255126953, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07708972692489624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07407770305871964, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08505471795797348, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0787927657365799, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07798978686332703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0702948272228241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06673794984817505, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.042795900255441666, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03727627918124199, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03652326762676239, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03634941205382347, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.021306023001670837, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.018581824377179146, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.018533192574977875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01722165010869503, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.017110319808125496, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011034199967980385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010902848094701767, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010766973719000816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.006893808953464031, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011034199967980385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011034199967980385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.14907720685005188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1372355967760086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.13277670741081238, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12052132189273834, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.06646987795829773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.06251199543476105, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.07612290978431702, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07008671760559082, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.06778907030820847, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.05981779843568802, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.05736730620265007, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.03844061121344566, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03325064852833748, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.031679343432188034, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03129921853542328, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.019202543422579765, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.016508836299180984, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.016370777040719986, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01524245273321867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01500921044498682, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.010336589999496937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.010434203781187534, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.009838714264333248, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.0072719864547252655, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.010336589999496937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.010336589999496937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.03630859777331352, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.03284765034914017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.030969364568591118, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.027427300810813904, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.016353946179151535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.014780615456402302, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02107807621359825, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.01838001050055027, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.016788072884082794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.014269601553678513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.013595939613878727, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.010658049024641514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.008781266398727894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.007872916758060455, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.007644992787390947, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.005358612630516291, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.004159857518970966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.004057885147631168, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.003760210471227765, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0036177202127873898, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.002882339060306549, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.002784586977213621, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0024748574942350388, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0019603127148002386, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.014269601553678513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.014269601553678513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.03626830503344536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.032564327120780945, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.030411427840590477, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.026813365519046783, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.016086969524621964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.014376404695212841, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.02008954808115959, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.01849849708378315, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.01655767671763897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013984177261590958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.013262521475553513, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0101016815751791, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.008785944432020187, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.007723155431449413, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.007453198544681072, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.005063709802925587, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00404775096103549, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.003918659407645464, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0036394288763403893, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0034660352393984795, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0026650717481970787, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.002698993543162942, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0023047872819006443, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.001824118779040873, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013984177261590958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013984177261590958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.12551936507225037, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.11621399968862534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.11316482722759247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.10134778171777725, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.05723559856414795, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.054002273827791214, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.06449136137962341, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.059592779725790024, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.05822078511118889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.050917889922857285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.04800647497177124, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.032594945281744, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.02833750657737255, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.027254413813352585, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.027004964649677277, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.016260480508208275, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.013823349960148335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.013733809813857079, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01251992117613554, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.012354237958788872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.008329370059072971, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.008107499219477177, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.007925374433398247, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0049176691100001335, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.013823349960148335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.013823349960148335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14574947953224182, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1206958070397377, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11167243123054504, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09630552679300308, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06460510939359665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05507013946771622, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07811105251312256, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07187499850988388, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06810206919908524, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05121878162026405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04781046509742737, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04006931558251381, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03467309847474098, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03139274939894676, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03058459423482418, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02010219544172287, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017024241387844086, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.016786938533186913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014563295990228653, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014034700579941273, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011075037531554699, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.011850625276565552, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010007367469370365, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008970632217824459, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014563295990228653, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014563295990228653, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14840272068977356, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1402919888496399, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13788022100925446, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12569434940814972, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06750058382749557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06491851806640625, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07450710237026215, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06897278130054474, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06826601922512054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06174548715353012, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.059055108577013016, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03771601989865303, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03284295275807381, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03220140188932419, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03204872086644173, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.018831171095371246, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.016787981614470482, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.016747694462537766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01567581295967102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015584597364068031, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010055538266897202, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010462788864970207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009831414557993412, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007409390993416309, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010055538266897202, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010055538266897202, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17737524211406708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1677473485469818, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16489030420780182, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1503870040178299, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08063143491744995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0775434598326683, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0888766497373581, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08236196637153625, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08155407011508942, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07375337928533554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07041187584400177, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04482650384306908, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.039027176797389984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.038262225687503815, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03807877004146576, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.022318284958600998, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.019462713971734047, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.01941291242837906, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.018079400062561035, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.017967019230127335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011543025262653828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011407961137592793, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011267443187534809, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007198195438832045, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011543025262653828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011543025262653828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.15521802008152008, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.14294828474521637, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.13850322365760803, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12527228891849518, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.06934948265552521, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.06531630456447601, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.07915258407592773, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07279455661773682, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07068410515785217, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06228001415729523, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.05953141301870346, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.039992690086364746, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.034546442329883575, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03304844722151756, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.032696206122636795, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.019992832094430923, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.017211023718118668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.017085473984479904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.015860386192798615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.015640251338481903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01080079935491085, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.010835535824298859, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.010331135243177414, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.0075447093695402145, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01080079935491085, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01080079935491085, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.03998773917555809, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.03640732914209366, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.03431953117251396, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.030505072325468063, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0180753692984581, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.016384800896048546, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02327318862080574, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02039581537246704, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.018512366339564323, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.01586543768644333, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.015128998085856438, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011779369786381721, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.009748314507305622, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.00871734507381916, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.008460510522127151, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.005912902764976025, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.004635754972696304, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00451551703736186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.004216249566525221, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.004054638557136059, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.003164144465699792, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0031423019245266914, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.002708777319639921, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0022475826554000378, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011779369786381721, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011779369786381721, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.037107594311237335, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.03364020213484764, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03120420314371586, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.027660047635436058, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.016530029475688934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.014705503359436989, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.021013889461755753, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.01940976083278656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.016948243603110313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.014494499191641808, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.013848131522536278, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0105621088296175, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.009203700348734856, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.00793763529509306, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.007614430505782366, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.005281582474708557, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.004165819380432367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004003205802291632, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0037745863664895296, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0035665850155055523, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.002791716018691659, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.002812751801684499, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0023806593380868435, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0018937208224087954, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.014494499191641808, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.014494499191641808, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.1388479471206665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.12893085181713104, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.12563128769397736, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1128474548459053, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.06359849125146866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.06008205935359001, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0719856396317482, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.06628216058015823, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0646933913230896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.05675734579563141, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.053647395223379135, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.03638839349150658, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0315098762512207, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03030831553041935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03002411685883999, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.018135547637939453, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.015337817370891571, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.015241359360516071, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.013925956562161446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.013735863380134106, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.009287863969802856, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00897233933210373, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.008806541562080383, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.005397043656557798, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.013925956562161446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.013925956562161446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16388732194900513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1380378156900406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12763720750808716, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10549794137477875, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07327877730131149, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0626823902130127, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08947388827800751, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08240257948637009, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07686666399240494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.057667315006256104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05295439809560776, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04594438150525093, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03983418270945549, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.035739362239837646, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.0347253791987896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023152854293584824, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01954335719347, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019200878217816353, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01670626923441887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0160529688000679, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012954600155353546, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013836379162967205, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011660819873213768, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010620751418173313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012954600155353546, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012954600155353546, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15136194229125977, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1432144045829773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.14076103270053864, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12853041291236877, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06915287673473358, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06652750819921494, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07630273699760437, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07069502025842667, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06994640827178955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06331062316894531, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.060621511191129684, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03861973434686661, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03365929797291756, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.032992251217365265, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03283236175775528, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019270969554781914, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017134560272097588, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017095543444156647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.015996281057596207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01589883863925934, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010211130604147911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01058354601264, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009977016597986221, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007378839422017336, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010211130604147911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010211130604147911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1851089596748352, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17526298761367798, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17231006920337677, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15737217664718628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08458617329597473, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08138145506381989, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09333328902721405, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08642944693565369, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08554656058549881, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07747183740139008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07410500198602676, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.047156427055597305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04102201387286186, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04020814597606659, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.040020402520895004, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023502252995967865, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020504916086792946, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020453333854675293, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019070828333497047, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.018949810415506363, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012225987389683723, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01209570374339819, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011937431991100311, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007738854270428419, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012225987389683723, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012225987389683723, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.16069112718105316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1478053629398346, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.14315494894981384, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12939319014549255, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07191090285778046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.06766054034233093, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08204639703035355, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07552679628133774, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07332152128219604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0644824430346489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06153501570224762, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.041472792625427246, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0358722060918808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03428439050912857, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.033913709223270416, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.020728347823023796, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0178682878613472, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01773872785270214, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.016449684277176857, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0162196084856987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011157345026731491, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01126801036298275, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01065278984606266, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.007868379354476929, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011157345026731491, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011157345026731491, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0522414892911911, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04828042909502983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.046455156058073044, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.041608262807130814, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02376868575811386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02213910035789013, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.028416775166988373, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.025583982467651367, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.024214666336774826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.021133411675691605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.020043374970555305, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01438361220061779, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.012200557626783848, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.011402490548789501, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.011208157986402512, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007212698459625244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005968417041003704, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005885137245059013, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005455404054373503, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005334373097866774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0038298736326396465, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.003838591743260622, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0034812085796147585, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0026796224992722273, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01438361220061779, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01438361220061779, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.04625537246465683, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.042558394372463226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0407244972884655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03634674474596977, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02077830396592617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.01922265812754631, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.02447480708360672, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0226603914052248, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.021191874518990517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.018415072932839394, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.017430350184440613, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012296590954065323, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010769005864858627, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009939256124198437, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.00973585806787014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006151722278445959, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005137152038514614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005046140868216753, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004667268600314856, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004538861569017172, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0032218119595199823, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003249736037105322, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0029472741298377514, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002150624291971326, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012296590954065323, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012296590954065323, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.14679434895515442, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.13738630712032318, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.13434764742851257, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1211429089307785, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.06734415888786316, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.06403272598981857, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.07556076347827911, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.06963764876127243, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.06835487484931946, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.060554757714271545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.05745282769203186, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.03820887953042984, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03310403227806091, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03207843005657196, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03180963546037674, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01904277503490448, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.016266336664557457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.016194863244891167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.014880423434078693, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.014722694642841816, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.009785112924873829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.009526011534035206, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.009364817291498184, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0058311717584729195, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.014880423434078693, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.014722694642841816, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16240189969539642, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1360931545495987, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12676548957824707, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10423661768436432, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07217139005661011, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0619979090988636, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08658996969461441, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0798175036907196, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07596717774868011, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.057096049189567566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05091863498091698, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04432092607021332, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0383308082818985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03487221151590347, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.034021444618701935, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022169049829244614, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.018581727519631386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01833098754286766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0157252699136734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015155995264649391, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012013411149382591, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012524740770459175, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010870334692299366, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009108392521739006, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012013411149382591, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012013411149382591, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14767573773860931, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.13985025882720947, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1374775767326355, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1258247345685959, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06786926835775375, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06529183685779572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07498206943273544, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06936933100223541, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06861847639083862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06219909340143204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.05967709422111511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0380442850291729, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0330982431769371, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.032437317073345184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03228039667010307, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.018999008461833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.01690099574625492, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.016853248700499535, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0157990250736475, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015704238787293434, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010131596587598324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010508785955607891, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009902154095470905, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007414395455271006, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010131596587598324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010131596587598324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1887277066707611, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17882712185382843, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17581161856651306, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16091468930244446, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08670434355735779, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08345015347003937, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09573756158351898, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0885881781578064, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08768298476934433, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07947197556495667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07616101205348969, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04849521070718765, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.042105212807655334, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.041273489594459534, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04107801988720894, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02418898046016693, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021104197949171066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021050162613391876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01965111866593361, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019525984302163124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012682614848017693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012528805993497372, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01238952949643135, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008122505620121956, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012682614848017693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012682614848017693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1609271615743637, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.14804276823997498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.14335975050926208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12977081537246704, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07220026105642319, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.06791161000728607, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0824030414223671, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07584688067436218, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07365197688341141, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06473642587661743, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06179186701774597, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0416407436132431, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0360843688249588, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03448757901787758, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0341094508767128, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.020880211144685745, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01803816854953766, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01790393702685833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01661725714802742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.016381075605750084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011335620656609535, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.011462659575045109, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.010831820778548717, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008092825300991535, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011335620656609535, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011335620656609535, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.049884747713804245, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.046251896768808365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04446931928396225, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.039856601506471634, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02277163416147232, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.021214943379163742, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.027429139241576195, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02458934858441353, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.023173116147518158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.020300714299082756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.019234947860240936, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013896306045353413, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.011735807172954082, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.010930636897683144, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01073413249105215, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.006969707552343607, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005729188211262226, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005643358454108238, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00524869654327631, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005127076990902424, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0037061695475131273, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0037009241059422493, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003350241808220744, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.002592726843431592, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013896306045353413, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013896306045353413, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.044053416699171066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.040635284036397934, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03872856870293617, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03451370447874069, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.019763214513659477, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.018219169229269028, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.023499468341469765, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.021830596029758453, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02015049010515213, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.017552515491843224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.016612479463219643, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011803265661001205, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010376798920333385, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009457152336835861, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009226536378264427, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00591423362493515, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.004901169799268246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004795877728611231, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004466956481337547, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004321653861552477, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003112749895080924, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0031411550007760525, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.002823319984599948, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0020915186032652855, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011803265661001205, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011803265661001205, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.15074877440929413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.14104586839675903, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.13794879615306854, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.12441293895244598, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0692649707198143, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.06583802402019501, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.07800546288490295, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.07165226340293884, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07024817913770676, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.06225098296999931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.05904799699783325, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.03944370523095131, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03406268730759621, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03298517316579819, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03273450583219528, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.019671181216835976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.016727447509765625, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.016639046370983124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.015287810936570168, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01512338686734438, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010077285580337048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.009782448410987854, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.009605970233678818, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.005975580308586359, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010077285580337048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010077285580337048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15871186554431915, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13654202222824097, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12795166671276093, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10710158199071884, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07151833176612854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06240428611636162, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08643515408039093, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07900728285312653, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0742567628622055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0575421042740345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05211399868130684, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04436603561043739, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03822746127843857, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.034882720559835434, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03405927121639252, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022532211616635323, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019091937690973282, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018801415339112282, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016551226377487183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01602933183312416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012869366444647312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013434666208922863, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01184004731476307, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010415975004434586, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012869366444647312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012869366444647312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.13095171749591827, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.12405651062726974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.12194430083036423, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.11162327975034714, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06092096492648125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.05861634761095047, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0675349161028862, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06236289069056511, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.061591651290655136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.055924899876117706, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.05378646403551102, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.034743357449769974, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.030575817450881004, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.029913512989878654, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.029775533825159073, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01766480877995491, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.016832081601023674, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.016796685755252838, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.015964720398187637, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015877941623330116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010297669097781181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012030419893562794, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010088765993714333, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010017316788434982, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010297669097781181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010297669097781181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1991885006427765, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18890251219272614, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1857127696275711, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1700279861688614, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09238629043102264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08889039605855942, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1022883951663971, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09452589601278305, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0934181660413742, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08471988886594772, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08142969757318497, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.052266381680965424, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04555707052350044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04461027309298515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04439530149102211, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.026170412078499794, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023897582665085793, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02383379451930523, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02245175465941429, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022325783967971802, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014393561519682407, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015738990157842636, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014077290892601013, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.012045703828334808, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014393561519682407, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014393561519682407, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.01343129575252533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.013287898153066635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.005311212502419949, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.005138100124895573, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.004840892739593983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.002187697682529688, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.01370551623404026, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.01307530328631401, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.004867331124842167, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.004750729538500309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.004851034842431545, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.004808113910257816, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.004654994700103998, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.002666766056790948, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0018683115486055613, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0026142459828406572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0017599983839318156, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0013411423424258828, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.0017473518382757902, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0013227390591055155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0017309979302808642, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.0017338137840852141, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.0012906990014016628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.001294120796956122, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.01343129575252533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.01343129575252533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.051417574286460876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04765220358967781, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04584473744034767, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04114321619272232, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.023571575060486794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.021966850385069847, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.027505025267601013, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.025417936965823174, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.023987699300050735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.020985741168260574, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.019953319802880287, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01393067091703415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.012127513997256756, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.011314088478684425, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.011114928871393204, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.006985431537032127, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005908919032663107, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005821482744067907, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005401429254561663, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005278600379824638, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0037027650978416204, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0037850162480026484, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0034290298353880644, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0026175568345934153, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01393067091703415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01393067091703415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.046025972813367844, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.04254082962870598, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04071199148893356, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.036355048418045044, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02086227759718895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.019338155165314674, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.024610474705696106, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.022786322981119156, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.021262837573885918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.018533246591687202, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.017604157328605652, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01241079531610012, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01084672287106514, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.00999682117253542, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009788017719984055, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006210344843566418, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00517946807667613, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00508533651009202, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0047149332240223885, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004585970193147659, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0032772531267255545, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003290571505203843, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0030113798566162586, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002200450748205185, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01241079531610012, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01241079531610012, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.18170049786567688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1700100153684616, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.16636550426483154, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.15025083720684052, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08418276906013489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08001268655061722, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0945093184709549, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08688855916261673, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08542431145906448, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.075644351541996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07172080874443054, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04785439372062683, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04135384410619736, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0401439405977726, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03984645754098892, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.023857152089476585, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.020310038700699806, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.020216412842273712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01853012852370739, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018342558294534683, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012214896269142628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.011781368404626846, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011687761172652245, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007081646006554365, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012214896269142628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012214896269142628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1277218908071518, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1027449443936348, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09276805073022842, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07892577350139618, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05669260397553444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04675406217575073, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07051418721675873, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.064928337931633, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06010939180850983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.043019361793994904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04092539846897125, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03634488582611084, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03143644705414772, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.02775099314749241, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.026822322979569435, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.018310900777578354, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015263568609952927, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014958130195736885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012811331078410149, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012199241667985916, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.010267121717333794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.010977663099765778, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009105957113206387, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00845372024923563, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014958130195736885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014958130195736885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14843334257602692, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1406809687614441, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13831676542758942, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1266247183084488, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06889207661151886, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06627199798822403, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07628527283668518, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0704394280910492, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06966880708932877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06311202049255371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06057662516832352, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03885462507605553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03376693278551102, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03308675438165665, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03292573615908623, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019436651840806007, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017402680590748787, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017362914979457855, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.016282953321933746, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016190871596336365, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010491513647139072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011056742630898952, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010254228487610817, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008054232224822044, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010491513647139072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010491513647139072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21001125872135162, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.19913654029369354, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19587676227092743, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1792478859424591, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0972420871257782, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09355015307664871, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10737243294715881, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0993366688489914, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09835280478000641, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0890122801065445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08531593531370163, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05444416403770447, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0472927987575531, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.046349965035915375, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.046126484870910645, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027159593999385834, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023631244897842407, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.023570651188492775, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02196016162633896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02182053215801716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01415802538394928, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013919729739427567, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013825427740812302, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008869301527738571, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01415802538394928, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01415802538394928, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.16701897978782654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.15274053812026978, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.14726340770721436, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.13305628299713135, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07505432516336441, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07006105035543442, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0866289809346199, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07964174449443817, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07674024254083633, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06683297455310822, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06379501521587372, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04370035603642464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03786033019423485, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03584606945514679, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.035359736531972885, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02196543477475643, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01864754594862461, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01847182586789131, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01706143282353878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.016757266595959663, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011931953020393848, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.011762700043618679, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011305088177323341, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008090176619589329, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011931953020393848, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011931953020393848, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.04204317554831505, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.03914155811071396, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.03741629421710968, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.03368895873427391, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.01932368054986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.017925353720784187, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02303716167807579, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02126535028219223, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.019649233669042587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.017301466315984726, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.0165537279099226, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011648685671389103, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.010149255394935608, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.009299739263951778, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.009093709290027618, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.005856277421116829, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.004891523160040379, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.004793211817741394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0044985683634877205, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.004369325004518032, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0031444625928997993, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0032058265060186386, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.002875011181458831, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.002246841788291931, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011648685671389103, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.011648685671389103, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.039486438035964966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0365791842341423, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.034764982759952545, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03116154670715332, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.017937757074832916, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.01652654819190502, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.021544668823480606, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.019996609538793564, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.01825200952589512, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.016004309058189392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.015273811295628548, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.010855548083782196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.009521872736513615, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.008621159009635448, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.008393042720854282, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.005447733215987682, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.004500761162489653, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004388723056763411, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004126445855945349, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.003981303423643112, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.002908222610130906, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.002936786971986294, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0026336675509810448, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.001993658719584346, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.010855548083782196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.010855548083782196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.16745899617671967, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.15657512843608856, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1530306190252304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.13815902173519135, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.07742416858673096, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0734681561589241, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.08737791329622269, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08012540638446808, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07854758203029633, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.06946638226509094, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.06592689454555511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04421523958444595, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.038108211010694504, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03690439835190773, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03660905733704567, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02202002704143524, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.018640901893377304, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.01854177936911583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01699220947921276, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.016807518899440765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011253520846366882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.010796837508678436, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.010709602385759354, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.006431464105844498, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011253520846366882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011253520846366882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14783501625061035, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1178777888417244, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10655910521745682, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08145453035831451, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06494785100221634, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0532611608505249, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08045631647109985, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07388344407081604, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06898330897092819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.047350384294986725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04241050407290459, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04129359498620033, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03606057167053223, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03206886723637581, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.031099550426006317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021158166229724884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.018194623291492462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.017897341400384903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015191963873803616, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014561433345079422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01231162529438734, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013529623858630657, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011104694567620754, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010963806882500648, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014561433345079422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014561433345079422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15067002177238464, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.14281198382377625, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.14042580127716064, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12857812643051147, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06993750482797623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06724952161312103, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07752050459384918, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07148969173431396, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07073793560266495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0640043169260025, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06148524582386017, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03942526504397392, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03418552502989769, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0334986075758934, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03333445265889168, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019709505140781403, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017439454793930054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.0173969604074955, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01627175509929657, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016173910349607468, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010538630187511444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010815728455781937, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010299750603735447, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00759773189201951, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010538630187511444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010538630187511444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21633005142211914, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20501819252967834, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20160214602947235, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18449318408966064, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10040229558944702, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0965263620018959, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11087924987077713, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10258433222770691, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1015419214963913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09181289374828339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08788817375898361, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05621883273124695, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.048853542655706406, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.047865837812423706, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04763586074113846, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028033478185534477, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02440151944756508, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024337053298950195, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02265479415655136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022506851702928543, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01458396203815937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014364853501319885, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014232905581593513, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009127777069807053, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01458396203815937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01458396203815937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17032836377620697, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1558084338903427, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.15006308257579803, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.13558518886566162, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07652400434017181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07137539982795715, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08868899196386337, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08140488713979721, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07818742096424103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06819361448287964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06509916484355927, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04482555761933327, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03874360769987106, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03656662628054619, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0360315777361393, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.022560473531484604, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019035639241337776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.018834080547094345, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01742899976670742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017098935320973396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012323001399636269, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012055558152496815, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011635829694569111, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00830464344471693, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012323001399636269, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012323001399636269, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0503678135573864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04679734259843826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04503801837563515, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04044761881232262, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02308494783937931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.021537132561206818, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.026897277683019638, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02489360421895981, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.023482469841837883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.020604895427823067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.01956985518336296, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01361354161053896, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.011875325813889503, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.011071654967963696, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.010876010172069073, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.006818382069468498, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005766069516539574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005679189693182707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005276755895465612, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005154840182512999, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0036043536383658648, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.003674339270219207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003337917849421501, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0025137849152088165, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01361354161053896, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01361354161053896, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.04557875543832779, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0421796515583992, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04039597138762474, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03612275421619415, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.020631544291973114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.019119950011372566, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.024319963529706, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.022588668391108513, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0209975503385067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.01835954189300537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.01741892099380493, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012232383713126183, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010725965723395348, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009860184043645859, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009648601524531841, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006121446844190359, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005080418195575476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004976375959813595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004624561872333288, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004488941747695208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0032039512880146503, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0031901963520795107, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0029306448996067047, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002065241103991866, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012232383713126183, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012232383713126183, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.15410147607326508, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.14417919516563416, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.14073950052261353, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.12696026265621185, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.07108619064092636, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.06744632869958878, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.08006349951028824, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.07366806268692017, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07214633375406265, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.06379687786102295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.06046858802437782, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04051916301250458, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03504292666912079, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03387538343667984, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03358924761414528, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02020176872611046, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.017110981047153473, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0170196034014225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.015606609173119068, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.015424199402332306, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010331661440432072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.009944317862391472, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.009841782040894032, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.005946815013885498, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010331661440432072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010331661440432072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16325624287128448, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13029678165912628, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11863765865564346, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09281937777996063, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07163691520690918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05929010361433029, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08773226290941238, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08061090111732483, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07639215141534805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05321967974305153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04669221118092537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04494953155517578, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03899136185646057, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.034954484552145004, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03396650776267052, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022780878469347954, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01919589377939701, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018924063071608543, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015892911702394485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015229031443595886, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012892557308077812, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013651126995682716, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011623531579971313, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010542547330260277, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012892557308077812, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012892557308077812, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1426912546157837, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.13520516455173492, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1328803151845932, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12167972326278687, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0663631483912468, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06380408257246017, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07337450236082077, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.06786506623029709, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06712592393159866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06070931628346443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.05818074196577072, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03731318563222885, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03244134038686752, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03177449479699135, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.031620364636182785, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.018643878400325775, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.016517754644155502, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.016472799703478813, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.015402483753859997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015308019705116749, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009919829666614532, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010215994901955128, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009681861847639084, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007132732775062323, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009919829666614532, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009919829666614532, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21325819194316864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20207233726978302, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19871094822883606, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18190310895442963, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09900357574224472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09517911821603775, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10924418270587921, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10117532312870026, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10014630109071732, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09054359048604965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08661926537752151, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05532198026776314, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04815753176808357, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04717990756034851, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.046950813382864, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02758054807782173, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024003244936466217, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02394033968448639, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0222735945135355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022128472104668617, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014260864816606045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014051680453121662, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013908174820244312, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008821130730211735, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014260864816606045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014260864816606045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17322134971618652, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.15821287035942078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1527586430311203, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1374526470899582, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07763062417507172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07258882373571396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0890011414885521, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0819244459271431, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07933934777975082, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06896699219942093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06556292623281479, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0449153371155262, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.038915082812309265, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03703911975026131, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03658636659383774, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0224821325391531, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019231796264648438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01907796785235405, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.017558064311742783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017277207225561142, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01206126343458891, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01205102726817131, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011465041898190975, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008264120668172836, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01206126343458891, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01206126343458891, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.04437548667192459, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04123743996024132, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.03940083831548691, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.035411227494478226, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02038515917956829, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.01886982098221779, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02414153516292572, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02243846282362938, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.020732618868350983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.018207870423793793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.017336534336209297, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0122338542714715, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.010716471821069717, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.009805346839129925, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.009578552097082138, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0061406237073242664, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005149330943822861, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005045201163738966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.004726359620690346, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.004587909206748009, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.003278521355241537, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0033735877368599176, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.002991091925650835, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0023574652150273323, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0122338542714715, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0122338542714715, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.04315486177802086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.040059879422187805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03801911696791649, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03409641236066818, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.019604427739977837, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.018034152686595917, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.02357396110892296, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.021951476112008095, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.019947251304984093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.017518822103738785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0166710764169693, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011875633150339127, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010437891818583012, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.00939717423170805, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009141789749264717, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.005942348390817642, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.004869496449828148, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004743208643049002, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004454142414033413, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004293656907975674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003133065765723586, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0031371803488582373, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.002818821696564555, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0020652024541050196, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011875633150339127, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.011875633150339127, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.14920270442962646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.13948684930801392, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.13609376549720764, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.12271531671285629, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.06876052916049957, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.06520551443099976, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.07800467312335968, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.07139696925878525, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.06983014196157455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.06164981424808502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.058545295149087906, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0394834466278553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03398043289780617, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03278627246618271, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03251679241657257, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.019701771438121796, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.016596511006355286, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.016499396413564682, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01512524951249361, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.014938349835574627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.010081945918500423, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.009686027653515339, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.009540271013975143, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.005835008807480335, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.014938349835574627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.014938349835574627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14872005581855774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11955447494983673, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10910086333751678, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08529321849346161, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06650307029485703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.055608659982681274, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08001187443733215, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07362055778503418, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06976833939552307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.047058217227458954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.043722573667764664, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04109662398695946, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.035558491945266724, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03235485404729843, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03156747668981552, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.020708424970507622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017577027902007103, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01734257861971855, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014074690639972687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013522489927709103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01155043300241232, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012253925204277039, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01052629854530096, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009344631806015968, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014074690639972687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014074690639972687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15582627058029175, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.14757053554058075, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1450490802526474, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1327071487903595, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07248765230178833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06966870278120041, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08005142211914062, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07415686547756195, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07332815229892731, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06626680493354797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06347758322954178, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04071784391999245, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03548740595579147, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03475179150700569, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.034580811858177185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02034076303243637, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.01813763566315174, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.018089618533849716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.016920559108257294, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016816498711705208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010821857489645481, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011326145380735397, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01056600734591484, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008030053228139877, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010821857489645481, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010821857489645481, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.20577768981456757, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1949353963136673, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19160887598991394, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.17522890865802765, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0954645499587059, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09175437688827515, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10532797127962112, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09761682152748108, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09659329801797867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08724724501371384, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08339667320251465, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05333714559674263, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04645339772105217, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04548869654536247, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.045257117599248886, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02657358907163143, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023124493658542633, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02305988036096096, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021436942741274834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021296514198184013, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013689221814274788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013513668440282345, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013338531367480755, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008442463353276253, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013689221814274788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013689221814274788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1585155874490738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1453450471162796, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.13983574509620667, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12577103078365326, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.071572445333004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.06671831011772156, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08388204127550125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.07643959671258926, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07309111207723618, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06361009180545807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06044471636414528, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.041838232427835464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0364033542573452, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.0343063585460186, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.033785562962293625, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02101358398795128, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.018007958307862282, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.017781918868422508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.016485923901200294, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.016139443963766098, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011306598782539368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.011610391549766064, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.010606123134493828, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008171257562935352, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011306598782539368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011306598782539368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.056131020188331604, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05233512446284294, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.050434619188308716, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.045464519411325455, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02577466145157814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02411540225148201, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.030244996771216393, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.027765996754169464, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.026190241798758507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02313418500125408, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.022152578458189964, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.015342353843152523, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013275478035211563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01239684596657753, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012179355137050152, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007693148218095303, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006482433527708054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006385546177625656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005965636111795902, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005831812042742968, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004086063243448734, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004160092677921057, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003799227299168706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0028870056848973036, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013275478035211563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013275478035211563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.046105753630399704, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.042892277240753174, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04074281454086304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03658289089798927, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.020873775705695152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.019227325916290283, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.025181373581290245, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.023379012942314148, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.021225405856966972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.01871558092534542, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.017843814566731453, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012648748233914375, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.011127526871860027, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.010004780255258083, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009733324870467186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006336606107652187, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005204809829592705, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005065552424639463, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00478077819570899, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004604095593094826, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0033610095269978046, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003376876236870885, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003015516558662057, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0022490641567856073, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012648748233914375, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012648748233914375, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.1691264510154724, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.15861645340919495, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1551922857761383, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.14034435153007507, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0782691240310669, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.07449808716773987, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.08822140842676163, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08074048906564713, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07936417311429977, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07047347724437714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.06696698069572449, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04470556601881981, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.038451213389635086, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03732505813241005, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.037047047168016434, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02229829877614975, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.018884077668190002, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.018792832270264626, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01725904643535614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01709126867353916, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011402319185435772, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.010958638042211533, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.010882219299674034, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0066024428233504295, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011402319185435772, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011402319185435772, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16784438490867615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13488535583019257, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.1219668909907341, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09476901590824127, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07416708767414093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06134648621082306, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09274337440729141, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08491900563240051, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07894279807806015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05351340398192406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05016239359974861, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.047875210642814636, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.041139114648103714, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03628818318247795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03505685552954674, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02432609349489212, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01992427557706833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019546551629900932, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01622576266527176, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015406291000545025, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01391174178570509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014282859861850739, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012384580448269844, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010966336354613304, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01391174178570509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01391174178570509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15008684992790222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1422020047903061, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13981182873249054, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12790298461914062, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06982570886611938, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06714270263910294, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07714325934648514, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07139651477336884, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07063595205545425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06387267261743546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06126314774155617, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03929683193564415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03424143046140671, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.033545296639204025, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.033386897295713425, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019673069939017296, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017636308446526527, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017586952075362206, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.016481293365359306, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016382835805416107, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010621638968586922, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011185521259903908, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010385198518633842, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008129595778882504, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010621638968586922, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010621638968586922, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19918331503868103, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18873722851276398, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18558712303638458, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16978947818279266, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09233474731445312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08876058459281921, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1018197238445282, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09436342120170593, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09338689595460892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08440797030925751, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0807594358921051, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.051581911742687225, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0449005626142025, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.043990641832351685, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.043774355202913284, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025703907012939453, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02238341234624386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02232365682721138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020764147862792015, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02062886953353882, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285405933856964, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013110855594277382, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012956646271049976, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00823935866355896, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285405933856964, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285405933856964, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17571642994880676, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16148272156715393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1563407927751541, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14104770123958588, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07907600700855255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07426853477954865, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09023135900497437, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08302277326583862, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08063362538814545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07057547569274902, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06714868545532227, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.045584358274936676, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03948992118239403, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03774397075176239, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03732879459857941, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02289426699280739, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019656162708997726, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.019510380923748016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018025990575551987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017767013981938362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012371735647320747, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012369709089398384, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011804740875959396, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00859805103391409, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012371735647320747, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012371735647320747, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06580532342195511, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06145638972520828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05953175202012062, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05371629446744919, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030206605792045593, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028440503403544426, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.034726161509752274, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03208788111805916, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03068803995847702, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02714947983622551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.025867560878396034, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.017571449279785156, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015311811119318008, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014480030164122581, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014278028160333633, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008798763155937195, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007534883450716734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007449509110301733, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006930880714207888, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006805852055549622, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004651370458304882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004756620619446039, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004374881274998188, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003264863044023514, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014480030164122581, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014480030164122581, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05519775673747063, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.051435522735118866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04944440722465515, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.0444924496114254, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.025020863860845566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02335960604250431, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029251689091324806, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.027136534452438354, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.025421366095542908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02246585115790367, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02135758474469185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014718287624418736, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.012900453060865402, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01196671836078167, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011731637641787529, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007374664302915335, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0061743310652673244, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00607083085924387, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005662813317030668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005521472077816725, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003863771678879857, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003876921720802784, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.00356475031003356, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002554032951593399, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014718287624418736, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014718287624418736, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.1665402203798294, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.15620502829551697, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1529114842414856, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.13830457627773285, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.07676351070404053, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0730951800942421, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.08623456954956055, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0791180357336998, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07784518599510193, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.06919428706169128, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.06578145176172256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04358989745378494, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03760680556297302, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03655890002846718, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.036303289234638214, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.021737439557909966, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0184844471514225, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.018405187875032425, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0169072262942791, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.016751009970903397, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011122379451990128, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.010711554437875748, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.010644722729921341, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.006444771774113178, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011122379451990128, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011122379451990128, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18482255935668945, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.15626254677772522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.14603056013584137, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.12111770361661911, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08306117355823517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.07191599905490875, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09897001087665558, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09112516045570374, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.086689293384552, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.06555631011724472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05915452539920807, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05070776492357254, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04390156641602516, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04028460010886192, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03941553086042404, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0255286768078804, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021693052724003792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.021418530493974686, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018433749675750732, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01785103976726532, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0141838900744915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014823388308286667, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013028020970523357, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011109407991170883, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0141838900744915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0141838900744915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15313898026943207, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.14504364132881165, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.14255450665950775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.13040007650852203, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07131265848875046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06853269785642624, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07883097976446152, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0729450210928917, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0721491351723671, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06517857313156128, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.062447529286146164, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.040109843015670776, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.034917913377285004, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0342012494802475, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03403182327747345, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02005799114704132, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017849234864115715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.01780221424996853, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01664389669895172, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01653849333524704, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01071830652654171, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011138442903757095, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01046738587319851, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007884623482823372, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01071830652654171, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01071830652654171, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2106989622116089, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1996103972196579, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19626693427562714, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.17943106591701508, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09787507355213165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09405229985713959, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10794679075479507, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10002969950437546, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0989992618560791, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08941707760095596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08547817170619965, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05469324812293053, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.047620829194784164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.046643707901239395, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.046410348266363144, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027257008478045464, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023712674155831337, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02364756539463997, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021983850747346878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021837856620550156, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014068291522562504, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013856944628059864, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013718619011342525, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008660122752189636, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014068291522562504, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014068291522562504, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1792377382516861, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16435196995735168, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.15897530317306519, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14310288429260254, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08059940487146378, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0755983218550682, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09186412394046783, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08478037267923355, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08225597441196442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0717354491353035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06812506914138794, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.046362511813640594, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0402793288230896, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.038448266685009, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03801342099905014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02316940948367119, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019961092621088028, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01980935037136078, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018251337110996246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01798096112906933, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012307226657867432, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012484204955399036, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01171489991247654, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008570870384573936, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012307226657867432, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012307226657867432, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06140119209885597, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05725586786866188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05527770146727562, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04983891174197197, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.028168104588985443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.026422884315252304, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.032782312482595444, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.030136242508888245, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02863067202270031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.025268085300922394, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024152567610144615, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.016581207513809204, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014393934980034828, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013520496897399426, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013311331160366535, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00831852201372385, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007051093503832817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006958302576094866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006482021417468786, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006351012736558914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004410774912685156, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.00448074284940958, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00412943959236145, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0030837717931717634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014393934980034828, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014393934980034828, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05132431164383888, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.047719087451696396, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04555889219045639, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.040886614471673965, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.023257579654455185, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.021542079746723175, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.027643242850899696, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.025698848068714142, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02365707792341709, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.020853696390986443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.019838646054267883, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01390848495066166, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.012232346460223198, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011150352656841278, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01089018676429987, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006978095509111881, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005804840475320816, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00567502761259675, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00532776303589344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005160473752766848, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003696607192978263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0037402212619781494, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0033653636928647757, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0025237088557332754, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01390848495066166, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01390848495066166, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20025914907455444, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18804962933063507, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18421117961406708, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16693349182605743, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09309832006692886, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08875390887260437, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10450609773397446, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09585341811180115, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09437936544418335, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08397175371646881, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07986873388290405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.052916478365659714, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04564996436238289, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04440613463521004, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.044105060398578644, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026403067633509636, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022442664951086044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022342294454574585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020551081746816635, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02036285027861595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01350143738090992, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012991170398890972, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012930933386087418, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0077947769314050674, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01350143738090992, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01350143738090992, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15995174646377563, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12304957956075668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10893230885267258, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08437858521938324, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07068342715501785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05543837696313858, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0890832245349884, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08094454556703568, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07523790746927261, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.049885742366313934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04655597731471062, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04582953453063965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.039311546832323074, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03472477197647095, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03356919437646866, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02348322421312332, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019285788759589195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018937144428491592, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015661858022212982, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014879817143082619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013742540962994099, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014049204997718334, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012328268960118294, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011014099232852459, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014879817143082619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014879817143082619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1501365453004837, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1421617567539215, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13972437381744385, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12780536711215973, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06996675580739975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06722322851419449, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.077265165746212, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0715533122420311, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07078851014375687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06390900909900665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06119617074728012, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.039344944059848785, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03425396978855133, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03354211151599884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03337640315294266, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019658630713820457, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017492420971393585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017449282109737396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.016307203099131584, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016205966472625732, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010482179932296276, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010901971720159054, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010233676061034203, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007706119678914547, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010482179932296276, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010482179932296276, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2113640159368515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20015659928321838, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19678793847560883, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18000324070453644, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0982341393828392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09438642859458923, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10838092863559723, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10041752457618713, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09937703609466553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08971435576677322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0857761949300766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05491674318909645, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04781368747353554, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04682942107319832, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04659667983651161, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027371590957045555, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023824656382203102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.023759860545396805, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022081423550844193, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021932872012257576, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01414682250469923, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013946150429546833, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01379330549389124, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008752284571528435, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01414682250469923, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01414682250469923, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17872415482997894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1638769507408142, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.15840686857700348, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1426784247159958, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08036793023347855, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07532927393913269, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0918324738740921, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08469800651073456, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08206400275230408, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07157200574874878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06801361590623856, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.046314965933561325, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0402011014521122, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03831775486469269, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03786219283938408, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.023138215765357018, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01983812265098095, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.019679445773363113, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018130162730813026, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01784852333366871, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012300788424909115, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012343873269855976, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011695705354213715, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00837316457182169, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012300788424909115, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012300788424909115, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05818590521812439, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.054339658468961716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05231412872672081, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04721660539507866, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.026750678196549416, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.025023572146892548, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03128634765744209, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.028926879167556763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.027178557589650154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02405509725213051, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02298915758728981, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.015849966555833817, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013824597001075745, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.012863053940236568, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012628893367946148, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007951891049742699, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006745306774973869, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006638170685619116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006220270413905382, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0060757980681955814, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004241738934069872, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004363678395748138, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003940306603908539, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003058036556467414, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013824597001075745, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013824597001075745, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05376517027616501, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.050108566880226135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04775574803352356, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.042994000017642975, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.024425748735666275, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02260708436369896, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029217546805739403, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.027071012184023857, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.024805892258882523, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.021931719034910202, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.020930474624037743, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01467362605035305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.012860618531703949, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011691414751112461, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011408450081944466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007350491359829903, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006069667637348175, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005927332676947117, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0055788131430745125, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0053952354937791824, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0038978566881269217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0038991712499409914, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0035433657467365265, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0025924283545464277, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01467362605035305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01467362605035305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.18585509061813354, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.17451347410678864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.17096850275993347, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.15476731956005096, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08605650067329407, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08200183510780334, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09678583592176437, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.088715098798275, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08728084713220596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07764029502868652, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07385897636413574, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.049019668251276016, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04220704734325409, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04103447496891022, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04073202982544899, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.024439118802547455, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.020733363926410675, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.020635047927498817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.018991773948073387, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018808506429195404, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486857362091541, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.011994482018053532, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011932815425097942, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00717097707092762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486857362091541, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486857362091541, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18336525559425354, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1447409987449646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12927278876304626, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10231561213731766, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0813722163438797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06528209149837494, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.1018918976187706, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09330660104751587, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08610308170318604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.056983426213264465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.054753199219703674, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05228291451931, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.045011114329099655, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03968147933483124, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.038323186337947845, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02638678252696991, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021646667271852493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02118702605366707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01726500131189823, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.016332212835550308, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014756852760910988, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015379288233816624, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01306798867881298, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0116475410759449, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014756852760910988, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014756852760910988, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.16374757885932922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1550288200378418, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1523490697145462, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.13931773602962494, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07644716650247574, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0734197199344635, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08450974524021149, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07821698486804962, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07734600454568863, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06981194764375687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06683465093374252, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.043104138225317, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.037516891956329346, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.036733563989400864, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03654983267188072, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.021571315824985504, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.019277554005384445, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.01922835037112236, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.017981359735131264, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.017868321388959885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011611800640821457, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012177086435258389, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011341771110892296, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008793625049293041, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011611800640821457, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011611800640821457, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.214853435754776, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20341376960277557, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19995933771133423, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18284648656845093, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09994970262050629, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09599051624536514, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11031554639339447, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10220935195684433, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10111895948648453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09124229848384857, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08716727048158646, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.055907197296619415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.048671092838048935, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.047653865069150925, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0474110022187233, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027875304222106934, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024237453937530518, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024171600118279457, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022454621270298958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022303761914372444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014400087296962738, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014180702157318592, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014037739485502243, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008882648311555386, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014400087296962738, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014400087296962738, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18334585428237915, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16779503226280212, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1620865911245346, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14583776891231537, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08240880072116852, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07715096324682236, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09427819401025772, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08687484264373779, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0841573104262352, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07324827462434769, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06948722153902054, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04754821956157684, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04125538468360901, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03931422531604767, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03885336220264435, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02378949150443077, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020420460030436516, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0202583409845829, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018653854727745056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018364083021879196, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012718768790364265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012797697447240353, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012102584354579449, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008791300468146801, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012718768790364265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012718768790364265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06325258314609528, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05897831544280052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.056900277733802795, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.051298607140779495, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02910701557993889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.027256429195404053, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03386455029249191, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03122934140264988, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02957507036626339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02610122039914131, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024902749806642532, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.017160866409540176, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014924166724085808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013974923640489578, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013742869719862938, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008602489717304707, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007286796346306801, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00718326261267066, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006695738527923822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0065512889996171, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004553670063614845, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004641733132302761, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0042421515099704266, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003194832243025303, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014924166724085808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014924166724085808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05637504160404205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05238158255815506, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05005283281207085, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04495128616690636, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02555139735341072, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.023683909326791763, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.030285056680440903, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02810884639620781, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.025983009487390518, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.022844625636935234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.021716149523854256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015238153748214245, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0133545882999897, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01222245767712593, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011946137063205242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00761442631483078, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006322087254375219, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006186434533447027, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005785530898720026, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005610758438706398, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003997191321104765, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004014355130493641, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003636004403233528, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0026434888131916523, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0133545882999897, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0133545882999897, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.17052370309829712, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.15963834524154663, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1560170203447342, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1410241276025772, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.07871512323617935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.07478789240121841, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.08885759860277176, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0814271867275238, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.07983984798192978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07084818184375763, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.06730716675519943, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04499409347772598, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.03874887526035309, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03752366453409195, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.037225350737571716, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0224311426281929, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.019006872549653053, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.018917730078101158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.017397956922650337, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01720479503273964, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011496398597955704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.011106124147772789, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.010937693528831005, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00676531158387661, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011496398597955704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011496398597955704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1450660675764084, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12106823921203613, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11262071877717972, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0936770811676979, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0653822124004364, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05601819232106209, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07803818583488464, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07171057909727097, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06828948110342026, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05011715367436409, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04668552801012993, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.040247101336717606, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.034767866134643555, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03189434856176376, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.031194059178233147, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.020407959818840027, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017436206340789795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01722371205687523, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014657072722911835, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01419568620622158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011626893654465675, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012201888486742973, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010748793371021748, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00944369938224554, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014657072722911835, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014657072722911835, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.14832988381385803, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1404254287481308, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1380033642053604, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1262226402759552, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06919044256210327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06644169241189957, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07649756968021393, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07079358398914337, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07001219689846039, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06316252797842026, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06049022823572159, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.038973111659288406, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.033902376890182495, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.033180493861436844, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03301726281642914, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01948067545890808, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017305200919508934, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017259584739804268, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01612342707812786, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01601788029074669, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01040598377585411, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010781754739582539, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010159008204936981, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007615064736455679, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01040598377585411, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01040598377585411, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.20656676590442657, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.19566760957241058, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19234508275985718, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1759018898010254, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09610719978809357, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09231740981340408, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10608971118927002, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09829355776309967, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09722916781902313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08774244785308838, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08386903256177902, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0537538081407547, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.046804457902908325, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04582112282514572, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04558918997645378, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.026800911873579025, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.0233046505600214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.023238416761159897, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021590184420347214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021443845704197884, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01383811142295599, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013638925738632679, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013482494279742241, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008537650108337402, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01383811142295599, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01383811142295599, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1726110279560089, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.15803495049476624, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.15268033742904663, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.13796798884868622, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07760539650917053, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07262831926345825, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08898826688528061, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08192411810159683, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07931719720363617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0691617950797081, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06588210165500641, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.044939130544662476, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03896598517894745, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.037081312388181686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03662632033228874, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.022541051730513573, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01933351904153824, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.019178008660674095, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.017708688974380493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017427846789360046, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012153028510510921, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012222676537930965, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011538874357938766, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008516921661794186, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012153028510510921, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012153028510510921, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06336608529090881, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05921129509806633, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.057158213108778, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05165553465485573, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.029155274853110313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.027349472045898438, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03375832363963127, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.031281206756830215, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.029607215896248817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02623695693910122, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.025026002898812294, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01709049940109253, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014931789599359035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013990094885230064, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013762549497187138, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008564283140003681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0072958627715706825, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007192951161414385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0067216865718364716, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006580118089914322, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004535492043942213, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004648388829082251, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0042321812361478806, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003205834189429879, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014931789599359035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014931789599359035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05720981955528259, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05334421992301941, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0509476475417614, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04593999683856964, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02605573646724224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02415580302476883, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03100481815636158, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.028765371069312096, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.026453513652086258, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02342092990875244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022368567064404488, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015583622269332409, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013668349012732506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01246123667806387, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012169567868113518, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007804175838828087, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0064391084015369415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006293876096606255, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005915903951972723, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005725481081753969, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004110249690711498, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004087909124791622, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003744266228750348, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002663129009306431, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013668349012732506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013668349012732506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2039429098367691, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1917538344860077, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18788555264472961, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1704472452402115, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09473004937171936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09030143171548843, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10645101964473724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09748813509941101, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09603535383939743, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08551899343729019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08146340399980545, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05391794443130493, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04641243815422058, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.045133452862501144, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04483617842197418, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026893218979239464, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022818127647042274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022725824266672134, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020920833572745323, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020723259076476097, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013758232817053795, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013190324418246746, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013150286860764027, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007898638024926186, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013758232817053795, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013758232817053795, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1651892513036728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13079030811786652, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11780392378568649, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09766831248998642, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07341140508651733, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05981913208961487, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09109426289796829, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0829898938536644, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07752366364002228, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0548955462872982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.049389585852622986, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04665042459964752, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.040235161781311035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03593258559703827, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.0348573699593544, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023739250376820564, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019852306693792343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019509749487042427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01655912771821022, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01585432142019272, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013673207722604275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014269964769482613, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01232416182756424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011121373623609543, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013673207722604275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013673207722604275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15318997204303741, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.14511176943778992, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1426321119070053, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1305084526538849, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07155930995941162, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06875140219926834, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07904429733753204, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0732010155916214, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07240457832813263, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06537231802940369, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06262160837650299, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.040297120809555054, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03508321940898895, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.034351471811532974, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.034181151539087296, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.020144252106547356, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.01795738749206066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.0179123692214489, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.016743648797273636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.016639888286590576, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010776886716485023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011253639124333858, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01052350364625454, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008019886910915375, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010776886716485023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010776886716485023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21147121489048004, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20038524270057678, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19702711701393127, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18035270273685455, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09850513190031052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09462431818246841, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10873467475175858, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10070975124835968, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09964532405138016, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08998767286539078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08605300635099411, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.055145278573036194, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04797598347067833, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04697906970977783, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.046743039041757584, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027485867962241173, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02390945889055729, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.023844294250011444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022160837426781654, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02201019413769245, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014224806800484657, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01400759257376194, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013868747279047966, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00880351196974516, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014224806800484657, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014224806800484657, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17109191417694092, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.15704798698425293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1517992466688156, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.13732227683067322, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07724831253290176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07235632836818695, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08866039663553238, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08157122880220413, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07891418784856796, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06895001977682114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06574620306491852, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04476229101419449, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03885038197040558, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03695116564631462, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.036501672118902206, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0224753525108099, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019314220175147057, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.019153691828250885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01771390438079834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01743398979306221, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0121524753049016, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012267461977899075, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011550957337021828, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008606554009020329, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0121524753049016, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0121524753049016, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06593732535839081, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06180097162723541, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05978984013199806, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05420849844813347, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030516453087329865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028724009171128273, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03527460992336273, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.032542500644922256, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.030970167368650436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027538076043128967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.026370126754045486, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01789330691099167, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015577772632241249, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014664256945252419, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014446261338889599, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00897209718823433, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007663678377866745, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007566271815448999, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007076811511069536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006941951345652342, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004769664257764816, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004891437478363514, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0044760312885046005, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00340424757450819, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014664256945252419, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014664256945252419, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06008575111627579, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05613941326737404, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05378354340791702, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04861129820346832, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.027453992515802383, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02556493505835533, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03261244669556618, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.030196264386177063, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.027893055230379105, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.024766774848103523, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.023745672777295113, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01642824336886406, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014354320243000984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013174953870475292, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012879550457000732, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008231686428189278, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0068152244202792645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006674275267869234, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0062820701859891415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006093088537454605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004338069818913937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004334073048084974, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003980194218456745, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0028629887383431196, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014354320243000984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014354320243000984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21892763674259186, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20605847239494324, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20182204246520996, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18363964557647705, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10193824768066406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0973593145608902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11458788812160492, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10496973991394043, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10334937274456024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09227076172828674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08812322467565536, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.058062463998794556, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04998033493757248, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0486287996172905, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04831774905323982, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028947316110134125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0245718602091074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02446776255965233, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022547487169504166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02234899252653122, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014790171757340431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014185437932610512, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014157898724079132, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008467432111501694, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014790171757340431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014790171757340431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1740793138742447, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12067066133022308, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0996972844004631, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08589667081832886, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07516053318977356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05303660035133362, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09579181671142578, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08811436593532562, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08169661462306976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04869022220373154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04924742132425308, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.049268826842308044, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.042602431029081345, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03686101362109184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.035409051924943924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02505192533135414, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.020375801250338554, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019969381392002106, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01580037549138069, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014757834374904633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014350143261253834, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014877203851938248, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012572340667247772, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011456207372248173, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014757834374904633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014757834374904633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.16612303256988525, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.15741898119449615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.15476202964782715, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14168202877044678, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07765064388513565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07462374866008759, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08590064942836761, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07940448075532913, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07856424152851105, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07097475230693817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06807338446378708, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.043777137994766235, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.038066472858190536, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03728805482387543, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.037099696695804596, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02192872017621994, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.019511962309479713, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.019460856914520264, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.018201177939772606, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.018091563135385513, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0118021285161376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012244748882949352, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01153158862143755, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008756110444664955, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0118021285161376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0118021285161376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21453796327114105, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2033628225326538, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19997261464595795, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18317370116710663, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10001406073570251, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09610595554113388, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11045026779174805, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1022394523024559, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10117963701486588, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09141068160533905, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08751111477613449, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05606774240732193, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0487271286547184, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04772278293967247, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04748367890715599, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.027956563979387283, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024337420240044594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024269498884677887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022577296942472458, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02242943085730076, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014536363072693348, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014337521977722645, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01417841762304306, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009125741198658943, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014536363072693348, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014536363072693348, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18111304938793182, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1666891723871231, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16139666736125946, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14644768834114075, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08165985345840454, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07676731795072556, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09350211918354034, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08596229553222656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08333558589220047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07322878390550613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06999068707227707, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04709950089454651, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.040853679180145264, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.038975462317466736, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.038523994386196136, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.023618288338184357, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02022734098136425, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020066769793629646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018588058650493622, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0183065477758646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01270189881324768, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012633899226784706, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012117843143641949, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00864700973033905, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01270189881324768, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01270189881324768, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06444700062274933, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0604957714676857, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.058391034603118896, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05298471078276634, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.029832463711500168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028032541275024414, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.034660033881664276, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03208734467625618, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.030264781787991524, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.026958823204040527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.0258000697940588, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01755119115114212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015330539084970951, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014340012334287167, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014100181870162487, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008813481777906418, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007503442466259003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007391742896288633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006938359700143337, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006790310610085726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0047119613736867905, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004816481377929449, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004401002079248428, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003354995744302869, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014340012334287167, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014340012334287167, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06105395033955574, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05716338008642197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.054845210164785385, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04960905387997627, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02802298218011856, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02611684985458851, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03312450647354126, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.030646806582808495, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.028433799743652344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02527005225419998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.024235667660832405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0166641678661108, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014607702381908894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013427887111902237, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013133181259036064, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008361047133803368, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0069351596757769585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00678849034011364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006388180889189243, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006199833936989307, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004408787004649639, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004377848468720913, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004056904930621386, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002869129180908203, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014607702381908894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014607702381908894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20426031947135925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19229407608509064, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18834900856018066, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17143398523330688, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0951048731803894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09074875712394714, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10702668875455856, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09812776744365692, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0963846743106842, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08611903339624405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08212294429540634, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05421123281121254, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04676603525876999, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.045390285551548004, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04506553336977959, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027040230110287666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02299227938055992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022877847775816917, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021121511235833168, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020919611677527428, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013849291950464249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013378537259995937, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013237876817584038, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00811195932328701, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013849291950464249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013849291950464249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17145246267318726, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12029068917036057, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10108689963817596, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0895979180932045, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07473617047071457, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.052475329488515854, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09539253264665604, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08585219830274582, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08048808574676514, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04999684914946556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04977431520819664, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.049279648810625076, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04200123995542526, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03711868077516556, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.035866376012563705, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02650586888194084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021204771474003792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02088235318660736, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.017137929797172546, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.016333142295479774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.017222389578819275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.016001787036657333, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.015957487747073174, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013063850812613964, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013063850812613964, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013063850812613964, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17572490870952606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1665593385696411, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1637512445449829, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14995534718036652, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08212345838546753, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07893048971891403, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0908549502491951, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08398832380771637, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08307841420173645, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07506326586008072, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07198963314294815, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04636438935995102, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04024588316679001, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03941884636878967, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03922220319509506, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023171259090304375, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020582003518939018, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020531360059976578, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019193870946764946, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01907327026128769, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012432748451828957, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012861798517405987, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012148790992796421, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009134853258728981, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012432748451828957, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012432748451828957, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2220536768436432, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21047624945640564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20698724687099457, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18953414261341095, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10354653000831604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09948515146970749, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11442460119724274, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10584554076194763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10474148392677307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09461860358715057, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09061823040246964, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.058105021715164185, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05046216771006584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04941738769412041, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.049170397222042084, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028984282165765762, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025203851982951164, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025135207921266556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023382337763905525, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02322477661073208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015115773305296898, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01485972199589014, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01474692951887846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009468309581279755, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01485972199589014, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01474692951887846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18603113293647766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17112568020820618, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.165479838848114, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15011779963970184, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08391397446393967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0787786915898323, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09617406129837036, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08851458132266998, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08563095331192017, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07520773261785507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07188134640455246, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.048484284430742264, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.042096275836229324, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04007914662361145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.039599236100912094, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024310724809765816, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02086210623383522, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02068888582289219, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019182506948709488, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018880119547247887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01307605765759945, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01313450001180172, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012422041967511177, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009084616787731647, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01307605765759945, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01307605765759945, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05740458145737648, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.053774040192365646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05159994214773178, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.046713292598724365, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02649921551346779, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.024726692587137222, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.031255077570676804, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.028941839933395386, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.026900701224803925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02391551434993744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.022885829210281372, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01582859642803669, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013841635547578335, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.012752039358019829, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012485633604228497, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00794502068310976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006693246774375439, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0065661282278597355, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006187566556036472, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00602506659924984, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0042451051995158195, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004358647391200066, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0039022532291710377, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0030481484718620777, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013841635547578335, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013841635547578335, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05408932641148567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.050477366894483566, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04785005748271942, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04308631271123886, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.024626675993204117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.022654497995972633, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029989294707775116, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02781989425420761, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02500115893781185, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.022183211520314217, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02129814215004444, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01503835991024971, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013235647231340408, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011821379885077477, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011462360620498657, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007544317282736301, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006149207707494497, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005968442652374506, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005670685321092606, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005452049896121025, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003995812498033047, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00401389179751277, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0035684872418642044, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002673553302884102, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013235647231340408, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013235647231340408, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20050038397312164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18869498372077942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1848495602607727, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16798880696296692, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09328994899988174, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08899550139904022, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1052531749010086, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09627261757850647, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09459888190031052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08444869518280029, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0804830938577652, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05337097868323326, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.045860711485147476, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04451664537191391, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04418516159057617, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02658412791788578, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022509833797812462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022410577163100243, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02067475952208042, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020461585372686386, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013593041338026524, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013057348318397999, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012951355427503586, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00785956159234047, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013593041338026524, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013593041338026524, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18307384848594666, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.14483310282230377, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.13216103613376617, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09241782873868942, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08196656405925751, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06799374520778656, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09775999933481216, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08944915980100632, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08581271022558212, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.055426742881536484, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04888460412621498, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05043193697929382, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.043378621339797974, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04004799947142601, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.039251863956451416, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.025614023208618164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021980740129947662, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02176392264664173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.017302457243204117, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01674518920481205, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014718220569193363, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01546047255396843, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013680406846106052, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012112769298255444, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014718220569193363, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014718220569193363, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1814626306295395, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1719822734594345, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16911087930202484, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1549018919467926, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08477415889501572, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08145853132009506, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09370088577270508, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08666010200977325, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08575411885976791, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07749740034341812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07428410649299622, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04777538403868675, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04150452837347984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.040652867406606674, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04045620933175087, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023873288184404373, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02117166481912136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021116850897669792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0197323989123106, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019609685987234116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012745625339448452, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013146083801984787, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012453209608793259, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009242726489901543, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012745625339448452, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012745625339448452, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2230280488729477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2114090472459793, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20792491734027863, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1904759407043457, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1039789617061615, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09994570165872574, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11486198753118515, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10628902912139893, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10520041733980179, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09507783502340317, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09100644290447235, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0583442822098732, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.050675224512815475, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.049634188413619995, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04938514530658722, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02909719944000244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02532789297401905, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025258565321564674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023507218807935715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02335096336901188, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01517278142273426, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01494823582470417, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014804501086473465, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009547297842800617, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01494823582470417, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014804501086473465, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1923440396785736, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17700593173503876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17123349010944366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15536871552467346, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08667988330125809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0814303308725357, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09928914904594421, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0913655012845993, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08845776319503784, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07775754481554031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0743362233042717, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04997323825955391, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04336608573794365, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04131532460451126, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04082276299595833, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02502257749438286, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02134443074464798, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02116737887263298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019608618691563606, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019300024956464767, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01336611621081829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01321598794311285, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012719402089715004, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008875931613147259, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01336611621081829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01336611621081829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05446160212159157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05106331408023834, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.048888664692640305, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04433578625321388, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.025197289884090424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.023473845794796944, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02979443036019802, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0276862271130085, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.025583086535334587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02277410589158535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02179650403559208, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.015083497390151024, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013261831365525723, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.012139418162405491, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.011867889203131199, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007577941752970219, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006408241577446461, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006277176551520824, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005938203539699316, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005772797390818596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004077413119375706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004231972619891167, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0037423076573759317, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0030044757295399904, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013261831365525723, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013261831365525723, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05330219119787216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.049917276948690414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04735655337572098, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04280273988842964, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.024430643767118454, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02252870984375477, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029769111424684525, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02758360654115677, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.024817822501063347, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02207922376692295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02123197540640831, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014984296634793282, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013136581517755985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011751147918403149, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011396926827728748, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007512256968766451, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0061216349713504314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005945604760199785, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005658568348735571, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005438178312033415, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004005372058600187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004004007205367088, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0036053683143109083, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0026859138160943985, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013136581517755985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013136581517755985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20699357986450195, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19488206505775452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19094231724739075, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17367781698703766, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09648428857326508, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0920487642288208, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1086643636226654, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09953412413597107, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09777769446372986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08734391629695892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08339691162109375, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05514112114906311, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04745592176914215, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04604088515043259, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.045719750225543976, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027496101334691048, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023322420194745064, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023217063397169113, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02143704518675804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021222323179244995, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014088979922235012, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013585027307271957, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013429272919893265, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008241579867899418, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014088979922235012, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014088979922235012, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17692618072032928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1281866878271103, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11025480180978775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09039031714200974, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07658408582210541, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05661967396736145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09618760645389557, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08838660269975662, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08309804648160934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0524301752448082, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04981768876314163, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04953322559595108, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04263363033533096, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0373716875910759, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.036032989621162415, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.025157004594802856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02037862502038479, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.020043671131134033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016052138060331345, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015108115039765835, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014284188859164715, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014505891129374504, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012609818950295448, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010940616950392723, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014284188859164715, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014284188859164715, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1849566549062729, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17531929910182953, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17235466837882996, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15774016082286835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08647503703832626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08311206102371216, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0955437421798706, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08843230456113815, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08749210089445114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07903195172548294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07571839541196823, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04864196851849556, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.042327214032411575, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.041460905224084854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.041254300624132156, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024298470467329025, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021577054634690285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021518627181649208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020102031528949738, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019972167909145355, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012915349565446377, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013372120447456837, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012609012424945831, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009365212172269821, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012915349565446377, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012915349565446377, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22594743967056274, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2141462117433548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21062584221363068, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1928165853023529, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10538358986377716, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10127630829811096, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11634686589241028, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10772684961557388, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10661432147026062, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09630130976438522, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09215978533029556, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05908401310443878, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0513373427093029, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.050287552177906036, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05003756284713745, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029460938647389412, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02564465068280697, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02557315118610859, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023785077035427094, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023629754781723022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015334850177168846, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015099710784852505, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014960527420043945, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009599537588655949, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014960527420043945, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014960527420043945, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1938658505678177, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1782873570919037, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17231738567352295, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15649385750293732, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08731511980295181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08195147663354874, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0999002605676651, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09220743924379349, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0891367569565773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07832618057727814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0749327540397644, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05026519298553467, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.043711427599191666, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04158291220664978, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04107058793306351, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025095732882618904, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021409181877970695, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021223904564976692, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019650723785161972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01933039352297783, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013228539377450943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.0131630077958107, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01254719402641058, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008702125400304794, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013228539377450943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013228539377450943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06008918955922127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05629737302660942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05404270440340042, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.048960138112306595, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.027765600010752678, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02591848000884056, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03272687643766403, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03031923994421959, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0281723290681839, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02506156824529171, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02395959198474884, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01655871793627739, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01448710449039936, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013364367187023163, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013083791360259056, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008335251361131668, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0070216902531683445, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00688804080709815, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006493366323411465, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006321356166154146, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004489341750741005, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004579268861562014, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004140391945838928, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0032091212924569845, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01448710449039936, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01448710449039936, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05793483555316925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.054231781512498856, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.051646992564201355, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.046599939465522766, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0264881681650877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.024528419598937035, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03190835937857628, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0295703262090683, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02690434828400612, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02388366498053074, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022900929674506187, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016026102006435394, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014053941704332829, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012698845937848091, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012358746491372585, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008026035502552986, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00657414086163044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006410911213606596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0060584042221307755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005851499270647764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0042406534776091576, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0042225634679198265, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003839402226731181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002780640497803688, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014053941704332829, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014053941704332829, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20394136011600494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1918468475341797, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1879054605960846, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17053593695163727, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09474887698888779, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09030330181121826, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10662486404180527, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09755546599626541, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0959848165512085, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0855695977807045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08161385357379913, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05408179387450218, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0464663952589035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04513804242014885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04481298848986626, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026955628767609596, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022847244516015053, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022738641127943993, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020967476069927216, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02075440250337124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013797148130834103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01327083446085453, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013159320689737797, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008010337129235268, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013797148130834103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013797148130834103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18490807712078094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1444939225912094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.1298929750919342, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.1026386097073555, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0817830041050911, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06592823565006256, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10064365714788437, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09197137504816055, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08695381134748459, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0584794357419014, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.052907731384038925, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05156032741069794, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04457978531718254, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04000987112522125, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03886934369802475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.026262711733579636, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02200559712946415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.021690376102924347, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.017870714887976646, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.017100026831030846, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01508729811757803, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015686633065342903, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013677624054253101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012149684131145477, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013677624054253101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013677624054253101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1891244500875473, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17918150126934052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17614537477493286, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16122835874557495, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08836430311203003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08489990234375, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09756449609994888, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09036669135093689, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08940164744853973, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08073224127292633, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07730723172426224, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04967116564512253, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04323869198560715, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.042339641600847244, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042129505425691605, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024795865640044212, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021983463317155838, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02192537486553192, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020464476197957993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020333420485258102, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013136574998497963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013555283658206463, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012822987511754036, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00941178947687149, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013136574998497963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013136574998497963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2282034456729889, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21621078252792358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21257765591144562, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1945677548646927, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10635176301002502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10217571258544922, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11734525859355927, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10873904824256897, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10759802907705307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09711595624685287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09289867430925369, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0594777911901474, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05180688574910164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.050738196820020676, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.050481997430324554, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029659686610102654, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02582058683037758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025749946013092995, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023929275572299957, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023769337683916092, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015326171182096004, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01512826606631279, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014941050671041012, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009506876580417156, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014941050671041012, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014941050671041012, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19587503373622894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17941036820411682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17320945858955383, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15706562995910645, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08816485852003098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08255213499069214, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1009465903043747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09313008189201355, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09005340188741684, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07876049727201462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07527770102024078, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.050912268459796906, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04419417679309845, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04202468320727348, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04150577262043953, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025455035269260406, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021732758730649948, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021549902856349945, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019911034032702446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01958652213215828, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013557039201259613, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013504392467439175, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012868429534137249, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009103954769670963, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013557039201259613, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013557039201259613, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05470618978142738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05124077945947647, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04904867336153984, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.044355157762765884, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.025230685248970985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.023492099717259407, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.029878946021199226, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.027750007808208466, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0256081260740757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02275262214243412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02176046185195446, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.015078556723892689, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013222682289779186, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.012117337435483932, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01184652466326952, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007559313904494047, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006328887306153774, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006197084207087755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0058406805619597435, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005672771017998457, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004016526509076357, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004093714989721775, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0036789588630199432, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0028022625483572483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013222682289779186, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013222682289779186, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05671277269721031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.053045108914375305, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.050686728209257126, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04574890434741974, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.025951148942112923, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.024098120629787445, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03100007213652134, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.028697770088911057, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.026346910744905472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.023396026343107224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022427110001444817, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015620282851159573, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01367100328207016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012441931292414665, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012133859097957611, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00782947987318039, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006436631083488464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006284398026764393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005924707744270563, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005728657357394695, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004121788311749697, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004098831210285425, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0037426776252686977, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0026848327834159136, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01367100328207016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01367100328207016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.17721635103225708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.166612446308136, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1629067063331604, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1477971225976944, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08207125961780548, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.07808500528335571, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09270991384983063, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08500073105096817, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08319760113954544, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07413333654403687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0707341879606247, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04694065451622009, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0404496043920517, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03910904377698898, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03879465535283089, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.023390119895339012, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.01980944350361824, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.019686637446284294, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.018171939998865128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0179681945592165, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011985602788627148, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01154978945851326, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011392549611628056, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.006979946047067642, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011985602788627148, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011985602788627148, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1640869379043579, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13230286538600922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12070298194885254, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09584196656942368, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0725833922624588, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0607011578977108, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08899908512830734, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08142833411693573, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07681801915168762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05315664783120155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.049196235835552216, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.045622531324625015, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03939780592918396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.035447124391794205, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03447194769978523, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023088373243808746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01945553719997406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01916489750146866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015987470746040344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015332018956542015, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013052163645625114, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013826443813741207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011790493503212929, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.01069027278572321, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013052163645625114, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013052163645625114, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1871151626110077, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1771477907896042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1740758866071701, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15925702452659607, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08739904314279556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0839201956987381, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09667805582284927, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0894365906715393, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08842393010854721, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07978194952011108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07635404169559479, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04921276867389679, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.042838986963033676, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0419236496090889, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04171062260866165, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02460167184472084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02185031585395336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02179204858839512, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0203485656529665, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020217597484588623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0131275225430727, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013601412065327168, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01280883140861988, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009588700719177723, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0131275225430727, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.0131275225430727, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22179362177848816, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21000833809375763, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2064194679260254, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1887681484222412, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10332530736923218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09918597340583801, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11409535259008408, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10569652915000916, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10454915463924408, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09430054575204849, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09010910987854004, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0578574500977993, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05033810809254646, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04927362501621246, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04901833459734917, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02884374000132084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025066591799259186, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02499147690832615, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023210307583212852, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023052718490362167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014906371012330055, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01467167679220438, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014517863281071186, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009191110730171204, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014906371012330055, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014906371012330055, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18858563899993896, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17266522347927094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16673944890499115, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1506069302558899, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08495233207941055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07959067076444626, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09711533784866333, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08961781859397888, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08675786107778549, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07575248926877975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07214541733264923, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.049116190522909164, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04266377538442612, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.040637291967868805, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04015621170401573, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024623624980449677, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021315058693289757, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02114434912800789, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01957094296813011, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01926887221634388, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01333532389253378, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013641742058098316, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012698582373559475, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009706014767289162, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01333532389253378, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01333532389253378, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06393557786941528, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.059886131435632706, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05776514858007431, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0522715263068676, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02954396791756153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.027724670246243477, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03447660058736801, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.031786154955625534, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.029988303780555725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.026660043746232986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02554091438651085, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.017501790076494217, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015218554995954037, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014218918979167938, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013975883834064007, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008797600865364075, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007447903975844383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007338290102779865, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006881436798721552, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006735588889569044, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004694199189543724, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0047919671051204205, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004384084139019251, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003349552396684885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014218918979167938, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014218918979167938, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.059389956295490265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05542418360710144, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.052930232137441635, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04771065711975098, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.026979960501194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02505715936422348, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.032221753150224686, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02982804737985134, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.027396470308303833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.024256419390439987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02320875972509384, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016232894733548164, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014174271374940872, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012933792546391487, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01262182742357254, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008167309686541557, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00672132009640336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006566275376826525, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006191408261656761, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0059946016408503056, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004385047592222691, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004322321619838476, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004000411368906498, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0028979547787457705, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014174271374940872, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014174271374940872, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.18564479053020477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.17450538277626038, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.17085912823677063, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.15513157844543457, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08604566752910614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08205828815698624, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0968935564160347, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08876480907201767, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08723244071006775, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07782606780529022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07424045354127884, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.049032773822546005, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.042257603257894516, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0410330668091774, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.040718626230955124, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.024437202140688896, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.020738139748573303, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02064616233110428, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01903235912322998, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018848659470677376, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012491483241319656, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01202522125095129, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011941126547753811, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007203496526926756, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012491483241319656, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012491483241319656, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15924999117851257, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12614697217941284, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11423082649707794, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0850565955042839, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07026425004005432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05685662850737572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08685363829135895, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07919719815254211, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07496955990791321, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05011238157749176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04598294943571091, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.044737353920936584, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03834779933094978, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03432440012693405, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.033316876739263535, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02276395447552204, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.018771279603242874, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018510853871703148, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015159069560468197, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01447402872145176, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01289291214197874, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013305560685694218, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011534766294062138, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010194124653935432, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01447402872145176, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01447402872145176, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.16636191308498383, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.15751776099205017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.15480023622512817, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14160224795341492, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07761374861001968, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07454722374677658, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08573225885629654, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0794164314866066, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07853347063064575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07089672237634659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06787703186273575, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04368901997804642, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.038059648126363754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03725221008062363, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.037061307579278946, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02182392217218876, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.01947639137506485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.019424958154559135, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01815715990960598, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.018042000010609627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011644132435321808, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012211120687425137, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011365652084350586, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008705626241862774, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011644132435321808, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011644132435321808, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2035272866487503, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.19275064766407013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18948835134506226, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.173320934176445, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09464430063962936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0909128487110138, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10441417247056961, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09682419896125793, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09575772285461426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08642400056123734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08260675519704819, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05290805548429489, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04609556123614311, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04511766880750656, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04488498345017433, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02636183612048626, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022932223975658417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02286490425467491, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021244410425424576, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021098699420690536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013574239797890186, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013393914327025414, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013219417072832584, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008345477283000946, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013574239797890186, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013574239797890186, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18476979434490204, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1695510894060135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16399826109409332, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14805200695991516, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08326738327741623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07814135402441025, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09526646137237549, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08764553815126419, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0849824771285057, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07425374537706375, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07077225297689438, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04813621938228607, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04164745286107063, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.039749570190906525, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03930504247546196, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024136675521731377, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020684801042079926, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020527619868516922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.0189479049295187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018666621297597885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013043754734098911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013000844977796078, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01243768259882927, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009001334197819233, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013043754734098911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013043754734098911, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06962449848651886, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06514845043420792, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06306131184101105, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.057075224816799164, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.032129090279340744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03024214133620262, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.037013545632362366, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.034203801304101944, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03261049836874008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02894105389714241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.027628766372799873, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.018747758120298386, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.016339050605893135, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015417889691889286, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01519695669412613, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009388082660734653, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008038150146603584, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007939781062304974, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00740809366106987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007270092144608498, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004977575037628412, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0050980704836547375, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00467999093234539, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003525576088577509, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009388082660734653, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009388082660734653, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06501463055610657, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0606856569647789, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05829301476478577, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.052610982209444046, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02963443472981453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02765127457678318, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03470801189541817, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03221467509865761, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03007320500910282, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026669349521398544, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02546600252389908, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.017465293407440186, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015326700173318386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014180071651935577, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013900648802518845, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008738173171877861, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0073071313090622425, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007172521203756332, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0067161438055336475, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006534261628985405, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004591192118823528, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004587233532220125, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004233429208397865, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002995511516928673, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014180071651935577, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014180071651935577, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.18779273331165314, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.17635901272296906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.17280709743499756, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.15693694353103638, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08687445521354675, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08282779902219772, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09737678617238998, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08947496861219406, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08801287412643433, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07850123941898346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07477432489395142, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04920658469200134, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04255533590912819, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04138435050845146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.041082825511693954, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.024534644559025764, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02090083807706833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02081029675900936, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01916474476456642, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018979478627443314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012537356466054916, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012073989026248455, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012012857012450695, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007184482179582119, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012537356466054916, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012537356466054916, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17328843474388123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13441866636276245, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11990612000226974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09534308314323425, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07635834068059921, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06059320271015167, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09469455480575562, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08718153089284897, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0812566801905632, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05460425093770027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04986470565199852, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.048556942492723465, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.041944798082113266, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.037100233137607574, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03587719053030014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02449914626777172, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01998785510659218, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019598279148340225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015991684049367905, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01513594388961792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013646489940583706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013921534642577171, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012102864682674408, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010268690064549446, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013646489940583706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013646489940583706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17965386807918549, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17019185423851013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16727174818515778, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15300625562667847, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08390624821186066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0805991142988205, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09262476861476898, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08580425381660461, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08487723022699356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07663445174694061, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07337092608213425, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.047163937240839005, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04110327363014221, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.040248796343803406, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.040047768503427505, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023563038557767868, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021008051931858063, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020955221727490425, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019576743245124817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019451847299933434, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012548794969916344, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013110584579408169, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012250908650457859, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009289364330470562, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012548794969916344, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012548794969916344, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21732068061828613, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20588000118732452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20239053666591644, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18513871729373932, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10114997625350952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09716864675283432, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11158919334411621, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10341189056634903, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10233277827501297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09237462282180786, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08829941600561142, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05657011270523071, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04925085976719856, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04823137819766998, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04799014329910278, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028188161551952362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024532148614525795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024466481059789658, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022731048986315727, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022579092532396317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014555761590600014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014355538412928581, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014190929010510445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008995522744953632, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014555761590600014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014555761590600014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19648359715938568, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17935362458229065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1728978008031845, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15615981817245483, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08829644322395325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08241299539804459, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10164829343557358, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09349523484706879, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09028221666812897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07849200069904327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07488235831260681, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0511428602039814, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04439617320895195, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04215075820684433, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.041602250188589096, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025604788213968277, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02189052104949951, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021695485338568687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019999852403998375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0196576826274395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013643220067024231, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01374239381402731, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012908637523651123, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009411279112100601, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013643220067024231, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013643220067024231, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.058076657354831696, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.054410360753536224, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05222557485103607, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.047288089990615845, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0268770270049572, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02509034425020218, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03170650079846382, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.029354890808463097, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02728584036231041, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02426069974899292, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02320370078086853, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01606728881597519, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01405666209757328, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01297810859978199, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012709518894553185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008114851079881191, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00687575526535511, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006749871652573347, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006371011957526207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006209086161106825, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004428800195455551, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004555976018309593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004103361163288355, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003284136299043894, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01405666209757328, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01405666209757328, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.056660134345293045, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05299868434667587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05065741017460823, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04576771706342697, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.026008283719420433, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.024105606600642204, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.031203506514430046, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.028803326189517975, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02642037719488144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.023495499044656754, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022552574053406715, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01572270691394806, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01375108677893877, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012487800791859627, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012166288681328297, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007881329394876957, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006458127871155739, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006303395610302687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005950461141765118, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0057487343437969685, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004163159057497978, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004132439848035574, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0037767996545881033, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0027151526883244514, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01375108677893877, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01375108677893877, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20729169249534607, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19489534199237823, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1908784955739975, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.173356831073761, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09641432762145996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09199326485395432, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10834554582834244, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09930641949176788, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09774038940668106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08715339004993439, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08308590948581696, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.054888106882572174, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04729973152279854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04597010463476181, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04566922411322594, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027373116463422775, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023242341354489326, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02312609739601612, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02130638249218464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021106183528900146, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013972255401313305, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013412131927907467, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013370939530432224, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008002134039998055, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013972255401313305, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013972255401313305, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1252930760383606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09794484823942184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08817937225103378, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0678565725684166, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05451599881052971, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04402228072285652, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0687393993139267, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.062240682542324066, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05880366265773773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0387863963842392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03693745285272598, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03502337634563446, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03031907044351101, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.02692919597029686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.026069276034832, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01811297796666622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015231026336550713, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015021081082522869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012586656026542187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012030369602143764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.010718519799411297, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.011311031877994537, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00968874804675579, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009117289446294308, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012586656026542187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012586656026542187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18025292456150055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17066912353038788, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1677645444869995, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15342848002910614, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08423057198524475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08089540898799896, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09299834072589874, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08615849912166595, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08521778881549835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07688954472541809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07360923290252686, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04736391454935074, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04125301167368889, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0403907485306263, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04018629714846611, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023658111691474915, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021025219932198524, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020969541743397713, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019574085250496864, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019450102001428604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01257195696234703, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01304311491549015, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012271363288164139, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009143689647316933, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01257195696234703, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01257195696234703, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21941322088241577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2077825367450714, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20429527759552002, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18682704865932465, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10220944136381149, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09817894548177719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11283168941736221, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10451464354991913, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10342442244291306, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09330914169549942, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08919233828783035, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.057251229882240295, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04980393871665001, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.048761263489723206, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.048515308648347855, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02854367159307003, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024837858974933624, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024767110124230385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02301308512687683, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022857598960399628, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014806310646235943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014586611650884151, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014434481039643288, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009212595410645008, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014806310646235943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014806310646235943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19825401902198792, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1810304969549179, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17450955510139465, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1580103039741516, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0891178771853447, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08320817351341248, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.102530837059021, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09444629400968552, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09115497767925262, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07938338816165924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07581546902656555, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0516723096370697, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.044794511049985886, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04246815666556358, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.041917476803064346, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02589152567088604, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02193550392985344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02173953503370285, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02004360966384411, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01969391293823719, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013889268040657043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013613075949251652, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013153784908354282, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009112175554037094, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013889268040657043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013889268040657043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06687957793474197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06271670013666153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06071097031235695, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05501294881105423, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030960500240325928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.029168974608182907, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03564940392971039, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03301768749952316, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03141617774963379, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027955228462815285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.026672158390283585, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.018051370978355408, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015780504792928696, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014873459003865719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014654862694442272, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00904428493231535, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007769639603793621, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007670309394598007, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007176595740020275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007043058983981609, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004813195206224918, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004951837006956339, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004527572076767683, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00344637269154191, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014873459003865719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014873459003865719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06418979167938232, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0601879358291626, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.057823676615953445, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05230015143752098, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.029429970309138298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.027528932318091393, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.034612420946359634, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03203480318188667, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.029863225296139717, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026585839688777924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.025446485728025436, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.017454147338867188, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015255702659487724, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014122933149337769, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013841121457517147, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008745667524635792, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007315652444958687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007175070233643055, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006750162690877914, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006570653524249792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.00462465500459075, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004633226897567511, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004272382706403732, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0030851615592837334, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014122933149337769, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014122933149337769, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21047447621822357, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19803766906261444, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1939845234155655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17626458406448364, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09777367860078812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09328994154930115, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1095537319779396, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.1005520150065422, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09908972680568695, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08845924586057663, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08434433490037918, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05547008663415909, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04785597696900368, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04656770080327988, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04629524424672127, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02764839306473732, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023520484566688538, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02342110686004162, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02157568372786045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02138564921915531, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014118798077106476, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013555181212723255, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01354357972741127, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008052409626543522, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014118798077106476, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014118798077106476, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.20362195372581482, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.14068076014518738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11679958552122116, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09741885215044022, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08925895392894745, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06574837863445282, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11067888140678406, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10185175389051437, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09534915536642075, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05428262799978256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05520946905016899, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.056518517434597015, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04877932742238045, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.043125856667757034, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.04170820489525795, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.028330307453870773, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02287149243056774, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.022446945309638977, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016412725672125816, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01527278684079647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015364126302301884, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015512237325310707, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013489211909472942, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010957645252346992, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013489211909472942, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013489211909472942, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18728680908679962, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17735172808170319, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17432375252246857, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15941786766052246, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08763693273067474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08415073901414871, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09682653844356537, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08964209258556366, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08866911381483078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0800168514251709, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07660122960805893, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04940750449895859, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043024685233831406, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04213078320026398, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04191727563738823, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024713991209864616, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022097498178482056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022041520103812218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020609745755791664, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020479198545217514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285129331052303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013954167254269123, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012985266745090485, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010066554881632328, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285129331052303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013285129331052303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2250383496284485, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21315081417560577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2095291018486023, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19163016974925995, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10489136725664139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10073268413543701, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1157684400677681, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1072663813829422, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10613172501325607, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09573343396186829, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09151419252157211, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05872023105621338, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05111394822597504, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05004582181572914, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04979141801595688, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029279081150889397, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02548322267830372, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02541167102754116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023604659363627434, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023446135222911835, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015158392488956451, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014948749914765358, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014776987954974174, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009419587440788746, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014948749914765358, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014776987954974174, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20181339979171753, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18437746167182922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17759323120117188, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1608777642250061, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09076832979917526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08470728993415833, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10486199706792831, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0964062437415123, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09283585846424103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08094197511672974, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07732031494379044, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05273515731096268, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04577317833900452, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04332631081342697, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04273882880806923, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026367858052253723, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022498928010463715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02228500507771969, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020603930577635765, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02023141086101532, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013989697210490704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014137032441794872, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013176658190786839, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00965524185448885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013989697210490704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013989697210490704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07091169059276581, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06652526557445526, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06444551795721054, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05840181186795235, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03278684243559837, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.030940240249037743, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.037633348256349564, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03487114980816841, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03325400501489639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.029628101736307144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.028261730447411537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01905154250562191, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0166767630726099, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015765001997351646, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.015546930953860283, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009549125097692013, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008258680813014507, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008159385062754154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007638768292963505, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007502659689635038, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005087912082672119, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005289267748594284, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004800918512046337, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003722364315763116, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009549125097692013, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009549125097692013, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06738760322332382, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06313540041446686, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06084537133574486, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05501771718263626, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.030902789905667305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.028995897620916367, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.036144983023405075, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.033459823578596115, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03137736767530441, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.027936451137065887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02669602259993553, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018226273357868195, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015931254252791405, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014814047142863274, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014534170739352703, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009131687693297863, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007650013081729412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007517257239669561, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007053638342767954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006881531327962875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004800252616405487, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004797767382115126, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0044557941146194935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0031612126622349024, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014814047142863274, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014814047142863274, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21546603739261627, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20302458107471466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19914507865905762, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18123780190944672, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10038429498672485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09591636806726456, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11226731538772583, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10315603017807007, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10171469300985336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09097574651241302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08684283494949341, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05688611790537834, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04911305755376816, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04783400893211365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04754182696342468, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02835562452673912, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024172021076083183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024086972698569298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022210555151104927, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02201937697827816, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014493651688098907, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013938242569565773, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013924935832619667, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008306384086608887, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014493651688098907, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014493651688098907, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.20179222524166107, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1513420045375824, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.13104525208473206, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09795375168323517, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08799435943365097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06725269556045532, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11390119791030884, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10277923941612244, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09472211450338364, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05996960401535034, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.0560181625187397, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05804428085684776, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.05006777122616768, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.043464139103889465, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.041785746812820435, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.029799863696098328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02451428584754467, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.024006683379411697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.019774897024035454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.018652066588401794, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01730675809085369, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.018315047025680542, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.015244022943079472, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.014584098942577839, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.014584098942577839, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.014584098942577839, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19087789952754974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1807246208190918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1776122897863388, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16237719357013702, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08927057683467865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08572161197662354, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09867595881223679, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09133965522050858, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09032565355300903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08144834637641907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07794548571109772, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.050279922783374786, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043731383979320526, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.042814239859580994, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04260123148560524, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02513914555311203, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022294528782367706, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022232994437217712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020751507952809334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02061808854341507, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013413243927061558, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013839779421687126, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013097889721393585, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009713955223560333, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013413243927061558, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013413243927061558, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2286922037601471, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21652992069721222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21285097301006317, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19465459883213043, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10664176940917969, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10241004079580307, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11773043125867844, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10905518382787704, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1079075038433075, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0972927063703537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09298928081989288, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05977386236190796, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05198105797171593, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05089230090379715, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.050637565553188324, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029812417924404144, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025933530181646347, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02586367353796959, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024020275101065636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023857668042182922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01547832041978836, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015248563140630722, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015093735419213772, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009652452543377876, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009652452543377876, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009652452543377876, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20366474986076355, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18591971695423126, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1789184808731079, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16228312253952026, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09156013280153275, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08536867797374725, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10547558963298798, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09739342331886292, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09368257224559784, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08166784793138504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07805424183607101, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05315451696515083, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.046153612434864044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04362054541707039, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.043008290231227875, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026510702446103096, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02248254045844078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02225496806204319, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02055194415152073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020165113732218742, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013961522839963436, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013911424204707146, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01313832588493824, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009201752953231335, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013961522839963436, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013961522839963436, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05929437279701233, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05558764934539795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05360523238778114, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04856840521097183, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02743951790034771, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.025761675089597702, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.031978100538253784, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.029534269124269485, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02784489467740059, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02477506548166275, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.023706583306193352, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01617998629808426, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014135168865323067, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013212034478783607, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012988700531423092, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00812396127730608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0069378092885017395, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006834768690168858, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006417449098080397, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0062793949618935585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004346933681517839, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0044894772581756115, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0040671806782484055, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0031699342653155327, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014135168865323067, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014135168865323067, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05655626952648163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.052867576479911804, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05051209032535553, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04568567872047424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.025917749851942062, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.024065639823675156, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03118225187063217, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.028683412820100784, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.026323474943637848, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.023397820070385933, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022478412836790085, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0157431922852993, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013706126250326633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012492363341152668, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012190906330943108, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007906477898359299, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0065380302257835865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006394931580871344, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006048304494470358, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005860121455043554, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.00422070873901248, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004277566913515329, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0038472723681479692, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002951245289295912, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013706126250326633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013706126250326633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22187426686286926, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20865371823310852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20442838966846466, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18570111691951752, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10340038686990738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09864216297864914, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11578724533319473, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.1063530370593071, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.1048356369137764, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09338575601577759, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08899204432964325, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.058574117720127106, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05064096301794052, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.049309324473142624, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04898424819111824, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02920905500650406, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024902164936065674, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024797899648547173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022814005613327026, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02261793054640293, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014931282959878445, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014353268779814243, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014336680993437767, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008539831265807152, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014931282959878445, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014931282959878445, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.19244784116744995, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12289529293775558, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08703585714101791, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08535739034414291, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07747724652290344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04268559813499451, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11315197497606277, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10399403423070908, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09011152386665344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.054638415575027466, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05606881156563759, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.058032505214214325, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.05005117505788803, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03821425139904022, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03493591770529747, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02933802455663681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02142828144133091, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02047363482415676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01752668060362339, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015329222194850445, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01659020595252514, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.016816502436995506, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012942707166075706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012556308880448341, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012942707166075706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012942707166075706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1950685977935791, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18467135727405548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1815248280763626, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16592583060264587, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09134592115879059, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08769222348928452, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1009637638926506, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09343235939741135, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09242506325244904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08332473039627075, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0797525942325592, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.051483284682035446, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.044811636209487915, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.043866172432899475, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04364744573831558, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025736281648278236, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02293739840388298, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022876176983118057, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021359393373131752, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0212257020175457, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013758428394794464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01437282282859087, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013436286710202694, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010246532037854195, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013758428394794464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013758428394794464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23326948285102844, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22084301710128784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2170788049697876, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1983918994665146, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10882952809333801, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10450323671102524, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12027789652347565, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11131668835878372, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11013537645339966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09927250444889069, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09485600888729095, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.061037592589855194, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05306948348879814, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.051946572959423065, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05168649181723595, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030468037351965904, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026475440710783005, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026403840631246567, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024514028802514076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024348216131329536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015862388536334038, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015573297627270222, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01546194963157177, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009868316352367401, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009868316352367401, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009868316352367401, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20488232374191284, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18732021749019623, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18036037683486938, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16363197565078735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09215483069419861, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08603326976299286, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10623236000537872, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09804440289735794, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09428089112043381, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08235717564821243, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07872533053159714, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.053444623947143555, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04646020010113716, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.043913766741752625, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04330316185951233, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026648487895727158, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022649405524134636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022426672279834747, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02074240706861019, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020360996946692467, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013995269313454628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014035488478839397, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013164066709578037, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009320651181042194, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013995269313454628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013995269313454628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07276551425457001, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06825222820043564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06613550335168839, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05987473577260971, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.033718667924404144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.031790610402822495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03882870823144913, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.035826463252305984, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03420432284474373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.030433151870965958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.029011117294430733, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019642844796180725, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017177199944853783, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016231359913945198, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01599889062345028, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009911608882248402, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008528318256139755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008425721898674965, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007885914295911789, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007741614710539579, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005357261747121811, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005493832286447287, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005057784728705883, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0038988846354186535, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009911608882248402, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009911608882248402, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06710796803236008, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06288236379623413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06054328382015228, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.054627008736133575, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.030789829790592194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02887207269668579, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03599840775132179, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03329906612634659, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03124675527215004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.027749696746468544, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02654149755835533, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018165268003940582, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015839194878935814, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01475362665951252, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014486807398498058, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00909718219190836, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007607129868119955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007478822022676468, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006997418589890003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006828506011515856, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0047826338559389114, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004746004473417997, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.00445035332813859, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0031044406350702047, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01475362665951252, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01475362665951252, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21152697503566742, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19912837445735931, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1949833184480667, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1768951565027237, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09849906712770462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09396117925643921, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11025679111480713, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10145147889852524, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0998273491859436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08901824802160263, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0848284438252449, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05584220215678215, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.048338882625103, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04697888717055321, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04666990041732788, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027848074212670326, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023764314129948616, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023659512400627136, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021785978227853775, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021569686010479927, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014250397682189941, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013760248199105263, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013680687174201012, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00824218150228262, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014250397682189941, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014250397682189941, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.206449955701828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13464447855949402, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10444947332143784, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09715733677148819, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0883859395980835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.059377457946538925, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11563656479120255, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10555189102888107, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09650923311710358, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05583106353878975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05778432637453079, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05916261672973633, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0511658638715744, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04353783652186394, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.041560057550668716, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.03026527538895607, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.024274753406643867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.023695040494203568, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018755104392766953, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.017378143966197968, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.017479151487350464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.018037129193544388, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01519262045621872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013996380381286144, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013996380381286144, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013996380381286144, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1993885636329651, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1887332797050476, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18544232845306396, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1693568229675293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09337857365608215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08961009979248047, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10320762544870377, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09555232524871826, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09450723975896835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08514373749494553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08140895515680313, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05265000835061073, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0458202064037323, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.044854529201984406, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04462830349802971, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02630547061562538, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02346053533256054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.023393962532281876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0218364167958498, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021696696057915688, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014061507768929005, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014707470312714577, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013727315701544285, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010487742722034454, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014061507768929005, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014061507768929005, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.235928475856781, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2232978492975235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2194674015045166, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20042967796325684, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11011193692684174, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10569088160991669, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12159751355648041, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11263309419155121, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11144962906837463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10036243498325348, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09583467245101929, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.061713770031929016, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05368882045149803, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05255430191755295, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05228545889258385, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030778435990214348, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02677192911505699, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026697508990764618, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024775797501206398, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024601774290204048, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015958178788423538, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01573079824447632, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015548846684396267, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009932689368724823, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009932689368724823, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009932689368724823, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20855401456356049, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19016307592391968, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18290363252162933, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1660078763961792, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09369353950023651, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08728773891925812, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10805007815361023, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09975095838308334, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09592641890048981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08362186700105667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07997546344995499, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05443628132343292, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04727223142981529, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04464481398463249, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04401373490691185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.027207477018237114, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02301088348031044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022780882194638252, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.021046699956059456, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02064896933734417, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014428295195102692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01424570195376873, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013589933514595032, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009426126256585121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014428295195102692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014428295195102692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07246827334165573, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06801232695579529, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06581659615039825, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.059677496552467346, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03367021679878235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03169804811477661, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03898647055029869, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03593252971768379, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03414580225944519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.030398203060030937, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.029107339680194855, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019778000190854073, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01716529205441475, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016161521896719933, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01592286117374897, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009905445389449596, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008389366790652275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008280749432742596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007738793268799782, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007588513661175966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005244053900241852, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005274644121527672, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004925700835883617, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003573989262804389, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009905445389449596, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009905445389449596, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06832465529441833, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06400713324546814, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.061598822474479675, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.055683683604002, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03148174285888672, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.029497310519218445, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.036869116127491, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03410863131284714, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03195975720882416, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0283768679946661, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02712981216609478, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018598956987261772, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.016256282106041908, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015094206668436527, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014809847809374332, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009322958067059517, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007803896442055702, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007669748738408089, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007183600217103958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007009216584265232, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004917014390230179, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004904589150100946, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004560911562293768, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003240130841732025, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014809847809374332, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014809847809374332, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20880690217018127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19645121693611145, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1924688220024109, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17464879155158997, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0972319170832634, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09267988801002502, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10896867513656616, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10022927075624466, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09855781495571136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08788702636957169, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08369415998458862, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05515056848526001, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.047734733670949936, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04637369140982628, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.046036090701818466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02751012146472931, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02343808114528656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023320375010371208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021489016711711884, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021274177357554436, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014063636772334576, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013555321842432022, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013484722934663296, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008073067292571068, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014063636772334576, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014063636772334576, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17948582768440247, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1281835287809372, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10815092921257019, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09223784506320953, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.077492855489254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05647635459899902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09925208985805511, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09080573916435242, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08420149236917496, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05321443825960159, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.0515928789973259, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.050760187208652496, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04386984184384346, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03793907165527344, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03642653301358223, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.025685828179121017, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02096296101808548, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02054109238088131, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016725527122616768, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015684334561228752, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014446813613176346, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015288827940821648, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012547730468213558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011762239970266819, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014446813613176346, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014446813613176346, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19759662449359894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18693991005420685, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1836767941713333, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1677650511264801, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09245367348194122, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08873384445905685, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10222124308347702, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09460298717021942, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0935688391327858, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08426732569932938, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08053882420063019, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05206577107310295, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.045284196734428406, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0443175733089447, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04409165307879448, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.026012254878878593, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023016342893242836, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022950250655412674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02137916162610054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02124037966132164, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013816616497933865, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014190519228577614, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013479630462825298, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009851227514445782, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013816616497933865, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013816616497933865, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23446860909461975, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22183528542518616, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.218004509806633, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1991521418094635, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10940177738666534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10500898957252502, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12079151719808578, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11195620894432068, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11073480546474457, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09968728572130203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09516484290361404, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.061295442283153534, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053348347544670105, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052214283496141434, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.051944609731435776, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030547402799129486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02657572738826275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026498887687921524, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024582093581557274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02441110461950302, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015779882669448853, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015576092526316643, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015371563844382763, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009788124822080135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009788124822080135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009788124822080135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20365868508815765, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18587948381900787, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17888085544109344, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16229327023029327, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0915147215127945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08538156747817993, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10564154386520386, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09739702194929123, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09368298947811127, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0817507654428482, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07816317677497864, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05327605456113815, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04619121551513672, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04364922642707825, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04303373023867607, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02665615826845169, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02258681133389473, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022367160767316818, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020696409046649933, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020316684618592262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014199787750840187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014103290624916553, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013379151001572609, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009500252082943916, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014199787750840187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014199787750840187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07837328314781189, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07344787567853928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07129387557506561, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06454849988222122, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03628239408135414, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.034269869327545166, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.041430845856666565, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03835214674472809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.036818861961364746, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03271288424730301, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.031178893521428108, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02099550887942314, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.018320603296160698, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01740117184817791, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.017183499410748482, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010506775230169296, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00904228538274765, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00894985068589449, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008334104903042316, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008198103867471218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005551496520638466, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005675510969012976, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0052520339377224445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0038878791965544224, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010506775230169296, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010506775230169296, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07497747242450714, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.07005662471055984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0677601769566536, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06115903705358505, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.034319985657930374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03228824958205223, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03966318070888519, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.036629319190979004, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03482190892100334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03089047782123089, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02944260649383068, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.020026251673698425, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017458979040384293, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.016446543857455254, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016199305653572083, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100279301404953, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008515330962836742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0084046246483922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00783766433596611, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.00767797976732254, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005305902101099491, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005333404056727886, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004978252574801445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003581282217055559, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100279301404953, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100279301404953, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20023047924041748, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1881054788827896, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18435345590114594, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16713440418243408, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0927581936120987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08843861520290375, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10372224450111389, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09547627717256546, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09401112049818039, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08381221443414688, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07975121587514877, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05248597636818886, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.045472100377082825, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.044225797057151794, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.043923672288656235, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026171715930104256, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022379035130143166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02228357456624508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02051849476993084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02032807469367981, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013395778834819794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01299961470067501, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012871738523244858, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0078605180606246, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013395778834819794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013395778834819794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1840430051088333, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13689960539340973, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.11839260160923004, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08828587830066681, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07942764461040497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06087009608745575, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.102373406291008, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09340217709541321, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0863095372915268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05217757821083069, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.051117438822984695, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05229593440890312, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04494607076048851, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03861212730407715, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03698841482400894, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.026330484077334404, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.020811902359128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.020334230735898018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015872010961174965, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01467127539217472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014413760975003242, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014707212336361408, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012351308949291706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.01070279348641634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01467127539217472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01467127539217472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1890060156583786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17878952622413635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1756874918937683, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16052518784999847, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08838847279548645, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08484139293432236, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09767092764377594, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09045270830392838, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08944780379533768, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08056758344173431, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07701953500509262, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04978552460670471, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04332207888364792, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.042400553822517395, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04218468442559242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02486925572156906, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022084906697273254, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022025275975465775, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02053678035736084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02040102519094944, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013235573656857014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013720264658331871, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012909953482449055, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009642081335186958, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013235573656857014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013235573656857014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22849884629249573, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2162095010280609, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21245181560516357, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.194070503115654, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10655119270086288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.102248415350914, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11761055141687393, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10900615155696869, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10783984512090683, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09710979461669922, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09270484000444412, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.059653282165527344, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05194122716784477, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05083198845386505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.050571009516716, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029738202691078186, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025872547179460526, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025798380374908447, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023931005969643593, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023765496909618378, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015373552218079567, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015162522904574871, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014974933117628098, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009521456435322762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009521456435322762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009521456435322762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19960027933120728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18168485164642334, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17461220920085907, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1582910716533661, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08964692801237106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08336913585662842, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10381706804037094, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09558498859405518, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09181413054466248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07982365787029266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07627865672111511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.052313756197690964, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0453447625041008, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.042765986174345016, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04214687645435333, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026159372180700302, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02216286212205887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021935319527983665, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020261701196432114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019872382283210754, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013945437036454678, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013891165144741535, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013111090287566185, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009394264779984951, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013945437036454678, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013945437036454678, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06033003702759743, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.056543491780757904, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05438017472624779, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04927584156394005, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.027949940413236618, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.026146551594138145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03281804174184799, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.030334340408444405, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.028374293819069862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.025211453437805176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024106543511152267, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.016607144847512245, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014488661661744118, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013432110659778118, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013172926381230354, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00832190178334713, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0070070382207632065, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006884453818202019, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00646466389298439, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006305447779595852, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004408533219248056, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.00449302326887846, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004068170208483934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0030792783945798874, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014488661661744118, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014488661661744118, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05558149516582489, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05199693515896797, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.049532268196344376, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.044726453721523285, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.025465331971645355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.023537416011095047, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.030658463016152382, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02834298647940159, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02583792805671692, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.022940192371606827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02199927531182766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015418577939271927, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013506126590073109, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012208448722958565, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011888341046869755, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00772746279835701, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0063386401161551476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006176205817610025, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0058423676528036594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005636561196297407, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004090354312211275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00408282270655036, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003692418336868286, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002707210136577487, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013506126590073109, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013506126590073109, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19253729283809662, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18109279870986938, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.17722629010677338, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16103984415531158, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08953139185905457, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0853053629398346, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1007213443517685, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0924946591258049, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09077039361000061, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08100109547376633, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07714909315109253, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.051010631024837494, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.044038981199264526, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0426953062415123, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04239070042967796, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02542106993496418, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.021596839651465416, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.021480541676282883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01980346255004406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01960877701640129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013005816377699375, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012515380047261715, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012414692901074886, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007482085842639208, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013005816377699375, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013005816377699375, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15814070403575897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11017436534166336, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08596515655517578, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0804857537150383, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06659843772649765, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.044974178075790405, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0936410203576088, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08596126735210419, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07439244538545609, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04821913689374924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04859989136457443, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.048009712249040604, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.041352517902851105, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.032702766358852386, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.030336182564496994, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02415994554758072, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01807418465614319, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.017283398658037186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014841518364846706, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013245569542050362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013297501020133495, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013752175495028496, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010552976280450821, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010150805115699768, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014841518364846706, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014841518364846706, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1908692717552185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18052248656749725, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17736172676086426, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16203570365905762, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08932745456695557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0857042521238327, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09873943775892258, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09142212569713593, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09042596817016602, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08140035718679428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0777834802865982, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0503515750169754, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04380674287676811, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0428672730922699, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04264849051833153, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025154827162623405, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022362180054187775, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022301141172647476, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020799726247787476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02066430076956749, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013431237079203129, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01394631713628769, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013112549670040607, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009860903024673462, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013431237079203129, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013431237079203129, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23058569431304932, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2181081622838974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21430349349975586, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19576551020145416, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10753946006298065, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10319273918867111, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11873698234558105, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11004451662302017, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10886375606060028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0979858860373497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0934838056564331, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06022422015666962, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05242505297064781, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.051305387169122696, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05104316025972366, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03002607263624668, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026109013706445694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02603725716471672, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024148769676685333, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023982292041182518, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015531810000538826, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015300871804356575, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015129360370337963, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009607663378119469, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009607663378119469, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009607663378119469, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.2049466073513031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18667830526828766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17942354083061218, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16262586414813995, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09203869849443436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08566541969776154, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10633209347724915, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09816874563694, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0942583829164505, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08201304078102112, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07832677662372589, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05358874425292015, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04651939496397972, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04385921731591225, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04321587458252907, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02675645798444748, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02261955291032791, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02238176390528679, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02066606655716896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020264191552996635, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014164711348712444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014036988839507103, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013313757255673409, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00930488109588623, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014164711348712444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014164711348712444, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06942376494407654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06510065495967865, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06299698352813721, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.057057756930589676, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0321669764816761, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.030289694666862488, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.037110231816768646, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0343293733894825, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.032642021775245667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.029019853100180626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02770998887717724, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.018787335604429245, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0164081621915102, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015457329340279102, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.015228046104311943, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009415978565812111, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008068219758570194, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00796326994895935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007445484399795532, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0073016732931137085, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.00499542523175478, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005132873542606831, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004690777510404587, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003554902272298932, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009415978565812111, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009415978565812111, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06671242415904999, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06241153925657272, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0600578673183918, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.054251011461019516, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03057868778705597, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02862834744155407, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0358438566327095, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03313355892896652, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.031070327386260033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.027556162327528, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.026350369676947594, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018071547150611877, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015798499807715416, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014677202329039574, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01440335251390934, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009062262251973152, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0076032900251448154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0074706850573420525, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006997907999902964, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006829824298620224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004796377383172512, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004797189496457577, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.00445266580209136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0032114710193127394, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014677202329039574, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014677202329039574, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19901983439922333, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18709929287433624, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1831611543893814, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16629181802272797, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09252855181694031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08815664798021317, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10364556312561035, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09543315321207047, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0938115194439888, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08353668451309204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07958555221557617, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05248139053583145, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04543203115463257, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04411790892481804, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0438292920589447, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026172084733843803, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02232828363776207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022228248417377472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020475734025239944, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02028755284845829, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013394790701568127, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012966236099600792, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012850537896156311, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00782344862818718, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013394790701568127, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013394790701568127, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.19717995822429657, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1498522311449051, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.13043217360973358, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09750896692276001, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08689804375171661, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06793670356273651, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10994000732898712, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10106945037841797, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09281937032938004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.057149745523929596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.056016210466623306, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05654555931687355, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04902408644556999, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04262380301952362, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.041007205843925476, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.028679311275482178, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.023620475083589554, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.023101352155208588, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018470730632543564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.017335031181573868, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01605420932173729, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.017247024923563004, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014008655212819576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013369591906666756, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014008655212819576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014008655212819576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19541025161743164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18478547036647797, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1815013438463211, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16573038697242737, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09142033755779266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08770491182804108, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10104575753211975, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0935707837343216, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09254879504442215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08325957506895065, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07951328158378601, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.051405809819698334, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04476436972618103, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04380818083882332, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0435759574174881, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025668369606137276, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022703060880303383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022642359137535095, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02107449620962143, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020936019718647003, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013557475991547108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013948934152722359, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013220849446952343, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009610376320779324, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013557475991547108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013557475991547108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23522672057151794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2224673479795456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2185792773962021, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19957329332828522, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10981709510087967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10534936934709549, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12127530574798584, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11235476285219193, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11115293949842453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0999913290143013, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09539386630058289, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06151294708251953, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053546447306871414, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052405547350645065, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05213601142168045, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030682934448122978, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026681970804929733, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026606548577547073, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024670850485563278, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02449917420744896, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015888208523392677, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015659771859645844, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01548181101679802, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009857278317213058, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009857278317213058, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009857278317213058, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.21040600538253784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1919906735420227, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1845780611038208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16723321378231049, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09448708593845367, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08805464953184128, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10932762175798416, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.10080675035715103, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09676823765039444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08434099704027176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0805322527885437, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05497872456908226, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04772371053695679, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04499121010303497, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04433188587427139, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02747792750597, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0231370497494936, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022894268855452538, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02114974707365036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020735066384077072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014540012925863266, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014260372146964073, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013666993007063866, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009317317046225071, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014540012925863266, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014540012925863266, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07252055406570435, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06791622191667557, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06574784219264984, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05952205881476402, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.033609338104724884, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03164936602115631, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.038669705390930176, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.035803016275167465, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03413451090455055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.030287250876426697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.028898369520902634, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0196073055267334, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01712309569120407, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016147196292877197, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01591215655207634, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009832211770117283, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008419537916779518, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008317681029438972, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007762443274259567, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007615282665938139, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005227829795330763, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0053436411544680595, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004915292840451002, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0036922849249094725, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009832211770117283, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009832211770117283, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06506156176328659, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.060908492654561996, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.058569494634866714, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05292205139994621, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.029949743300676346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02800847217440605, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03509671241044998, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.032443705946207047, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03040812909603119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026952847838401794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.025768060237169266, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01769370213150978, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015441027469933033, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014353298582136631, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01408434472978115, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008875220082700253, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007413843646645546, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007286021485924721, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006813614163547754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006650816649198532, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004685754422098398, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004652073606848717, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004342576488852501, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0030609448440372944, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014353298582136631, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014353298582136631, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20764680206775665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19495844841003418, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19103050231933594, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17334075272083282, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09656219929456711, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09200390428304672, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10828900337219238, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09951738268136978, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09786268323659897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08717042952775955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0830170139670372, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05479364097118378, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04739319905638695, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04604313150048256, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04572497308254242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02733437344431877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023279471322894096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023181386291980743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021339179947972298, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02113083191215992, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013998833484947681, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013487688265740871, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013433994725346565, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008083820343017578, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013998833484947681, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013998833484947681, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18310004472732544, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12874150276184082, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10771186649799347, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08914966136217117, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08033294975757599, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.058360058814287186, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10069078207015991, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09227856993675232, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08622338622808456, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0507354773581028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05060349404811859, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0517529658973217, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.044732704758644104, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03938104584813118, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03803660720586777, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.026485316455364227, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021683985367417336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.021299317479133606, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016523223370313644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015557539649307728, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015318775549530983, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015619466081261635, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013680082745850086, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0120511120185256, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013680082745850086, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013680082745850086, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19431445002555847, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18376481533050537, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18051673471927643, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16479671001434326, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09095605462789536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08724227547645569, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10051625221967697, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0930931568145752, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09207186102867126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08280477672815323, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07910726964473724, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0512031614780426, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04459628090262413, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0436343252658844, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04341140761971474, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025594983249902725, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022726034745573997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022664103657007217, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021117350086569786, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020975951105356216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013616707175970078, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.0141183752566576, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013284157030284405, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00991469994187355, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013616707175970078, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013616707175970078, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23315173387527466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22048448026180267, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21664725244045258, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19778241217136383, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10883592814207077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10439649969339371, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12014099955558777, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1113666221499443, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11015147715806961, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09908849000930786, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09453638643026352, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06096164882183075, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05307181924581528, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05192721635103226, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05166199058294296, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030387410894036293, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026424754410982132, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026349499821662903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024422064423561096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024250036105513573, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015691706910729408, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015480258502066135, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015280998311936855, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009706031531095505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009706031531095505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009706031531095505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20178212225437164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18393516540527344, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17655649781227112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1598818302154541, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09082300215959549, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08440782129764557, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10563070327043533, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09728608280420303, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09303988516330719, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08096956461668015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07725974172353745, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05312643572688103, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04626277834177017, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.043453607708215714, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.042778655886650085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026502452790737152, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022718265652656555, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022462014108896255, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020833294838666916, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02041105180978775, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0140090212225914, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014533680863678455, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013102504424750805, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.010143624618649483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0140090212225914, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0140090212225914, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08111851662397385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07606387883424759, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07397835701704025, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0670526847243309, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03759278729557991, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03557765856385231, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04266185685992241, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03948010504245758, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03813767805695534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03390263393521309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03227539360523224, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.021591654047369957, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01883327215909958, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.018001001328229904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.017802396789193153, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010789807885885239, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00929399486631155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009212976321578026, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008552199229598045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008427614346146584, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005657557863742113, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005728738848119974, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005378391593694687, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0038297846913337708, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010789807885885239, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010789807885885239, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07553961873054504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.07068877667188644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06842934340238571, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06184147298336029, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03469177708029747, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03267841041088104, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0399719662964344, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.036940451711416245, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03520215302705765, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03124026209115982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029808340594172478, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.02013365924358368, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017563071101903915, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.016594426706433296, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016360919922590256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100815175101161, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008533223532140255, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008428803645074368, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007837996818125248, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007687119767069817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005295852664858103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005252294708043337, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004980083554983139, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0034255266655236483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100815175101161, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0100815175101161, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19991865754127502, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.187857523560524, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18397191166877747, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16685520112514496, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09256613254547119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08832986652851105, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1034088283777237, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09536769986152649, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0938301756978035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08358149975538254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07966534793376923, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05230535566806793, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04538024216890335, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04411124438047409, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04380922392010689, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02607918530702591, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022299541160464287, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022213727235794067, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020442945882678032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020256847143173218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013352317735552788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012911488302052021, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012844553217291832, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007749423384666443, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013352317735552788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013352317735552788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.19103582203388214, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.15441901981830597, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.14138847589492798, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10464978963136673, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.084275983273983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.07034917920827866, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.1025213748216629, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09414259344339371, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08923212438821793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.061108797788619995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.052950650453567505, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05240211635828018, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.045200102031230927, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04074255749583244, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03964032605290413, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.026294833049178123, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02182588167488575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02151290886104107, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.017539672553539276, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01676339842379093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014347675256431103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01491063553839922, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012860727496445179, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.01094270683825016, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014347675256431103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014347675256431103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19435879588127136, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18382501602172852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18058182299137115, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16488705575466156, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09098026156425476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08727938681840897, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10053715109825134, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09310559928417206, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09207960218191147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08285626024007797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07914208620786667, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05122247338294983, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.044617388397455215, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04366699978709221, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.043442804366350174, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025599148124456406, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022775966674089432, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022714868187904358, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02117169462144375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02103508450090885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013634312897920609, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014202425256371498, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013302719220519066, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010030945762991905, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013634312897920609, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013634312897920609, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23235179483890533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21976423263549805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21594980359077454, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1971147209405899, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10843318700790405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10403543710708618, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11976006627082825, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1109483391046524, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10976502299308777, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09874896705150604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09422220289707184, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06076695770025253, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.052881255745887756, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.051749929785728455, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05148051679134369, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030300913378596306, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026336124166846275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026261385530233383, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024348951876163483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02417873963713646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015671413391828537, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015433362685143948, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015267777256667614, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009689826518297195, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009689826518297195, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009689826518297195, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20623831450939178, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18813654780387878, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18076610565185547, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.163680762052536, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09274639189243317, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0863690972328186, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10742545127868652, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09905829280614853, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09497051686048508, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08270440995693207, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0789058655500412, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.054102323949337006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04701222851872444, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04427444189786911, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04362226277589798, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.027068492025136948, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022992946207523346, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02275042235851288, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02105775661766529, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020647410303354263, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01445509772747755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014489559456706047, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013591958209872246, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009875657968223095, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01445509772747755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01445509772747755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08589750528335571, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08041247725486755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07819072902202606, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07074934989213943, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03970537334680557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.037568915635347366, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.045279014855623245, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04172050952911377, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.04029875993728638, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.035775091499090195, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03413695842027664, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.022989479824900627, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.019970877096056938, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01908523216843605, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01887289062142372, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011530478484928608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009978965856134892, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009894111193716526, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009207584895193577, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009076030924916267, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006143241189420223, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0063241636380553246, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005850442685186863, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004441431257873774, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011530478484928608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011530478484928608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07430171966552734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06939702481031418, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06710134446620941, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.060518860816955566, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03392350673675537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03186842054128647, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03916936367750168, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03620864823460579, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03443511202931404, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.030484188348054886, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029021671041846275, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019704055041074753, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017213687300682068, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.016206033527851105, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015960989519953728, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009859497658908367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00831542257219553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008204901590943336, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007622772362083197, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007467659190297127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005157883279025555, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00510686868801713, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004827607423067093, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0032914995681494474, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009859497658908367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009859497658908367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.18735195696353912, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.175889790058136, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.17218242585659027, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.15591175854206085, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08656817674636841, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08249358832836151, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09686420112848282, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08932844549417496, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08784394711256027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07812480628490448, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07431643456220627, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.048979006707668304, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04250452667474747, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04124356433749199, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0409427210688591, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02441401034593582, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.020856797695159912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.020752867683768272, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.019101114943623543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01892288774251938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486107647418976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012081662192940712, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011997934430837631, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007252143230289221, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486107647418976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012486107647418976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.20268398523330688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.16766957938671112, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.15433955192565918, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.12284505367279053, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08993903547525406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.07589514553546906, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11103931069374084, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.10108897089958191, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09472973644733429, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.06927115470170975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.06144990026950836, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05685647949576378, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.048623114824295044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.043560564517974854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.04230606183409691, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.028611421585083008, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.023363450542092323, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02295515686273575, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01950383372604847, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.018655046820640564, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01571609266102314, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.016044415533542633, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014012590982019901, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011778820306062698, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014012590982019901, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014012590982019901, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1932436227798462, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18279299139976501, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1795474737882614, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16392982006072998, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09044717252254486, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08674094825983047, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0999390035867691, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09255444258451462, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0915309339761734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08234921097755432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07869353890419006, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0508841946721077, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04433905705809593, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04338950291275978, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04316287487745285, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025434473529458046, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022621557116508484, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022555014118552208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.021025080233812332, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020884795114398003, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013525635004043579, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014078164473176003, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013191812671720982, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009912911802530289, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013525635004043579, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013525635004043579, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2299308478832245, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21743682026863098, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21366360783576965, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1950712949037552, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10728199779987335, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1029287651181221, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11846272647380829, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10978040099143982, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10858608782291412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09772050380706787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09322429448366165, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06009840592741966, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05231568589806557, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05119844153523445, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05093010514974594, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029966501519083977, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026066923514008522, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025991350412368774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024101072922348976, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023933319374918938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015501146204769611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015289610251784325, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015102788805961609, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00962315034121275, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00962315034121275, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00962315034121275, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20404894649982452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18551942706108093, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1779705286026001, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1607978790998459, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09167049080133438, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08510228991508484, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10659193247556686, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09816422313451767, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09396769851446152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08143757283687592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07764380425214767, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.053724505007267, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.046594999730587006, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04377812519669533, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.043096210807561874, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026934580877423286, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022751251235604286, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022501477971673012, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02078143320977688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020357543602585793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014459794387221336, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014379826374351978, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013565556146204472, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00982982199639082, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014459794387221336, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014459794387221336, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08721435070037842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08171962946653366, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07955976575613022, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07204699516296387, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.04028144106268883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03818454593420029, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04551262408494949, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04216379672288895, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.040882211178541183, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03632909059524536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.034550417214632034, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.023050418123602867, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.020113520324230194, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.019279062747955322, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.019079234451055527, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011521010659635067, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00996442697942257, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009886196814477444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00917555671185255, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009049569256603718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006041640415787697, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006143749691545963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0057594566605985165, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004134842194616795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011521010659635067, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011521010659635067, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07938501238822937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.07418937981128693, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.07195048034191132, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06504174321889877, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.036288097500801086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03427023068070412, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.041508886963129044, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03841129690408707, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.036822497844696045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03268083557486534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.031093021854758263, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.020918356254696846, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01825306937098503, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.017328625544905663, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01710442267358303, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010442822240293026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008872298523783684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008775882422924042, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.008145860396325588, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.008005401119589806, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005449626129120588, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005395339801907539, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.005141599103808403, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0034627183340489864, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010442822240293026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010442822240293026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19584064185619354, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18398064374923706, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18018148839473724, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16342143714427948, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09051878750324249, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08631967753171921, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10106828808784485, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09324910491704941, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09177552163600922, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08180747181177139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07786346971988678, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05110766738653183, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04437320679426193, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.043138422071933746, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0428486205637455, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02548050880432129, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.021819256246089935, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.021720431745052338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020012354478240013, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.019828451797366142, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013048121705651283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012660670094192028, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012557370588183403, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007639335002750158, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013048121705651283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013048121705651283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.19086703658103943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.16495543718338013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.15513986349105835, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.11972518265247345, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08576638251543045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.07578199356794357, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10253535211086273, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09426341950893402, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08883605152368546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.06717216968536377, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.057064563035964966, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05245935916900635, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04534493386745453, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.041514161974191666, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.04056422784924507, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.026394033804535866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02229662798345089, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.021955374628305435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018825463950634003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01819806545972824, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014561466872692108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015192301943898201, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013342891819775105, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011308464221656322, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014561466872692108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014561466872692108, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1872723400592804, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17713622748851776, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1739978790283203, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15889713168144226, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08751531690359116, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0839603841304779, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09671688079833984, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08959266543388367, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08859068155288696, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07972923666238785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07617495954036713, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.049212053418159485, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.042912811040878296, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04199067875742912, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.041776880621910095, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02458055317401886, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021879971027374268, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021817542612552643, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020335499197244644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02020277827978134, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013028183951973915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.0136081138625741, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012705429457128048, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009570174850523472, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013028183951973915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013028183951973915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22069495916366577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20871849358081818, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20508961379528046, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18724815547466278, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10289120674133301, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09871888905763626, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11363454163074493, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10528848320245743, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10412504523992538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09371230751276016, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08943429589271545, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05763436481356621, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05015728622674942, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04907437786459923, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.048818185925483704, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028732450678944588, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024971581995487213, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024901464581489563, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023092027753591537, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022925790399312973, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01484570000320673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014630429446697235, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014454631134867668, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00916869193315506, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01484570000320673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01484570000320673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19230739772319794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17420190572738647, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16653479635715485, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15010178089141846, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08644150197505951, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07982122898101807, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10129707306623459, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09317929297685623, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08869369328022003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07641460001468658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07279153913259506, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05112217739224434, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04435371235013008, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04143144190311432, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.040722090750932693, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025664737448096275, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021810607984662056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021537361666560173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019910713657736778, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019466103985905647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013929273001849651, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014193341135978699, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012999281287193298, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.010094717144966125, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013929273001849651, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013929273001849651, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08866649121046066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0831691175699234, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08103832602500916, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0734621211886406, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.04097957909107208, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03888728842139244, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04655797779560089, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.042855504900217056, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.04158030450344086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03703794255852699, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03539736941456795, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0236218199133873, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.02049107290804386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.019671553745865822, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01947310008108616, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011833972297608852, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.010232207365334034, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.010155877098441124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009451435878872871, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009329966269433498, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006253935396671295, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.00639696978032589, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005979244597256184, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004421604331582785, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011833972297608852, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011833972297608852, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07189089059829712, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06724398583173752, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06504247337579727, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.058761678636074066, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03280578553676605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.030871745198965073, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03780246526002884, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.035048797726631165, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03329990431666374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.029593270272016525, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02816404588520527, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019045548513531685, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0166639257222414, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01567671447992325, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01543224323540926, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009532399475574493, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008060554973781109, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007953537628054619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0074178678914904594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007267405278980732, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005003390833735466, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0049798996187746525, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004686332307755947, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003251301823183894, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009532399475574493, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009532399475574493, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2007649838924408, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18883667886257172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18506275117397308, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16801995038986206, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0928645059466362, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08866047859191895, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10364081710577011, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09546192735433578, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09410861879587173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0839705765247345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08010122179985046, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05244390666484833, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.045406509190797806, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04419368878006935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0439319983124733, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026152167469263077, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02235576882958412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02227097377181053, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020537633448839188, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020359870046377182, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013379954732954502, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012932198122143745, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012880653142929077, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007774587254971266, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013379954732954502, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013379954732954502, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.18669231235980988, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.14516609907150269, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.13037645816802979, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.11312897503376007, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08183947950601578, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0636679157614708, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10011778771877289, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09222392737865448, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08694832026958466, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0614655427634716, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.056936196982860565, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05115897208452225, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.044236671179533005, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03954693302512169, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03838260844349861, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02569785714149475, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021107707172632217, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.020768366754055023, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.017295386642217636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.016481557860970497, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01406797394156456, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014401552267372608, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012524603866040707, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010414732620120049, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01406797394156456, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01406797394156456, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1905653029680252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18033549189567566, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17720234394073486, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16184818744659424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08897190541028976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08539127558469772, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09829210489988327, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09102120250463486, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09005668759346008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08109328895807266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0775497555732727, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.049998216331005096, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04357238858938217, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04264984279870987, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042434532195329666, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0249672532081604, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022188471630215645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022130530327558517, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0206303633749485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020495453849434853, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013225517235696316, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013751483522355556, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012907544150948524, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009614942595362663, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013225517235696316, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013225517235696316, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22174885869026184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20979033410549164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20618890225887299, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.188319593667984, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10332255810499191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0991898626089096, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11403575539588928, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1057015135884285, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10458340495824814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09418782591819763, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08991760015487671, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05785243958234787, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05036349967122078, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04930059611797333, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04904704540967941, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028837082907557487, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025092629715800285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025022268295288086, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023214293643832207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02305627427995205, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014906711876392365, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014711586758494377, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014529231004416943, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009257282130420208, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014906711876392365, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014711586758494377, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.40.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1935865730047226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17577241361141205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16848713159561157, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15197747945785522, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08708304911851883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08065904676914215, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10125932097434998, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09338021278381348, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08925816416740417, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07720490545034409, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07348047941923141, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05124003812670708, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04450475797057152, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04169970378279686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04102960601449013, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025718269869685173, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021902672946453094, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021654807031154633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02002405747771263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019618863239884377, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013952123001217842, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014162776991724968, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013087207451462746, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.010047771967947483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013952123001217842, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013952123001217842, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08355280756950378, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07827732712030411, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07615204155445099, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06899970769882202, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03856741264462471, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.036487918347120285, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.044116392731666565, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04054239019751549, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03913094103336334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03479594737291336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03328455984592438, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02235286496579647, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.019409485161304474, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01853148452937603, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01831655576825142, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011228455230593681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009694336913526058, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009605112485587597, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00895324070006609, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00882271584123373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.00600828044116497, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006151773501187563, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005715509876608849, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004327720031142235, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011228455230593681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011228455230593681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07078571617603302, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06603440642356873, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06367743760347366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.057468168437480927, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0322074331343174, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.030184431001544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.037392936646938324, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03459810093045235, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03271283581852913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.028917768970131874, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.027553759515285492, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018788734450936317, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0164579339325428, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015409136191010475, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015144106931984425, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009406767785549164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0079509187489748, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007832462899386883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007303809281438589, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007140509318560362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.00494796596467495, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004972558002918959, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0046076700091362, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0032988018356263638, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009406767785549164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009406767785549164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2009592354297638, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18909934163093567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18521198630332947, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16818207502365112, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09321065247058868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08895306289196014, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10407125949859619, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09599418938159943, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09454191476106644, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08433939516544342, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08029712736606598, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.052639733999967575, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04566757753491402, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.044447943568229675, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04414947330951691, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026246653869748116, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02246727980673313, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022373130545020103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02060975693166256, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020426204428076744, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01342141255736351, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01300206407904625, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012921431101858616, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0078092883341014385, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01342141255736351, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01342141255736351, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16430233418941498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13754808902740479, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.1273273080587387, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.10267900675535202, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07364783436059952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06293661147356033, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09050202369689941, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08167517930269241, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07652028650045395, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.056117139756679535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.050770290195941925, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04627623409032822, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03976789489388466, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03621900454163551, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03535144403576851, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023939816281199455, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.020251967012882233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01994229666888714, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01719394139945507, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01664213091135025, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014315016567707062, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014686308801174164, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013286439701914787, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011776924133300781, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014315016567707062, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014315016567707062, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18304088711738586, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.173246830701828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17022094130516052, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15557071566581726, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08540904521942139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08199533820152283, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09439576417207718, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08737114071846008, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08644531667232513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07789338380098343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07452180236577988, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04801756888628006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0418168380856514, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04094371199607849, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.040733128786087036, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023997697979211807, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021300677210092545, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02124178409576416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01981029286980629, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019683578982949257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012743135914206505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013198567554354668, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012440639548003674, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009232748299837112, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012743135914206505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012743135914206505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21908651292324066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.207347571849823, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20383208990097046, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18626080453395844, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10207910090684891, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09799899160861969, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11271180212497711, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10438389331102371, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10330101102590561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09308956563472748, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08892843872308731, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05719872564077377, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04976130276918411, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04872006177902222, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04847225174307823, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028543079271912575, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024858539924025536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024791410192847252, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02302020974457264, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02286534011363983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014853007160127163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014669226482510567, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014486854895949364, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009361788630485535, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014853007160127163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014853007160127163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.41.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19296535849571228, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17556393146514893, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16834419965744019, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15216605365276337, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08668936043977737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08042887598276138, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1010214313864708, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09306047111749649, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08887818455696106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07706183195114136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0735025554895401, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0509747639298439, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04422599822282791, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04143815115094185, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.040765430778265, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02548411302268505, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021578853949904442, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02132715843617916, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019722899422049522, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019305892288684845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013552043586969376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013717014342546463, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012641126289963722, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009429384022951126, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013552043586969376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013552043586969376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08800995349884033, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08264689147472382, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08053953945636749, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07309577614068985, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.040651723742485046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0385909378528595, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.046178191900253296, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04251328855752945, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.041231267154216766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03676993399858475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03517426550388336, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.023395320400595665, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.020311374217271805, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.019485484808683395, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01928700879216194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011714423075318336, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01010170578956604, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.010022168047726154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009329947642982006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009207089431583881, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006181205622851849, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006268888246268034, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005906431004405022, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00427060155197978, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011714423075318336, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011714423075318336, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07514926791191101, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.070276640355587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06799732148647308, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.061464834958314896, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03423804044723511, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03222634270787239, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.039539240300655365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.036471422761678696, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03475894033908844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03087168000638485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029416106641292572, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019911523908376694, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01736457832157612, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01637081615626812, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016125567257404327, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009961407631635666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008442304097115993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008333876729011536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007772576063871384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007615749724209309, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005234450101852417, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00524152722209692, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004903524648398161, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0034765296149998903, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009961407631635666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009961407631635666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20635826885700226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1943143755197525, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19047728180885315, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17316438257694244, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09566853195428848, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09141078591346741, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10662902146577835, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09834937751293182, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09693767130374908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0866953507065773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08255548775196075, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05389512702822685, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04682441055774689, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.045614827424287796, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04529787227511406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026883190497756004, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023063715547323227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022971337661147118, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021210579201579094, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02102254517376423, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013773839920759201, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013355517759919167, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01329297199845314, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008083412423729897, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013773839920759201, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013773839920759201, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16642576456069946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13263434171676636, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12100644409656525, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09342426806688309, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07276307046413422, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.059494923800230026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09125320613384247, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08124922215938568, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07715029269456863, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.053953636437654495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.048227548599243164, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04648534208536148, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03940534591674805, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03558768332004547, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03464045003056526, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02441319264471531, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019678624346852303, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019421076402068138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0163466427475214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.015720047056674957, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015031646937131882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014092054218053818, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013919904828071594, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011046932078897953, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014092054218053818, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.014092054218053818, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1826058328151703, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17293106019496918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16995824873447418, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15540984272956848, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08517295867204666, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08180267363786697, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09432689100503922, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08712221682071686, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08619888871908188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07774136960506439, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07448214292526245, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04803420230746269, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04169753938913345, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04082232713699341, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04061782360076904, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024023042991757393, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021232394501566887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02117125876247883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019752105697989464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019628364592790604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01286324579268694, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013135993853211403, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01256027165800333, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009174525737762451, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01286324579268694, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01286324579268694, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22041943669319153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20877614617347717, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2052278220653534, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1876964122056961, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10269122570753098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09863433986902237, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1134718805551529, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10499953478574753, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10389840602874756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09372168034315109, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08965542912483215, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05766169726848602, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05005979537963867, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.049019504338502884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04877253994345665, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0287780724465847, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025034762918949127, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024967189878225327, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023204486817121506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023050257936120033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015047181397676468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01480101142078638, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01468030083924532, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009487364441156387, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01480101142078638, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01468030083924532, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.42.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.187996968626976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17075186967849731, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1632644087076187, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14794912934303284, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08428903669118881, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07790694385766983, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0990196242928505, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0912974625825882, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08660341799259186, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0750487893819809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07176937907934189, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04998145252466202, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04333188757300377, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04025982320308685, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03951407968997955, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024964677169919014, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02088923379778862, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020602095872163773, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019096193835139275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0186314657330513, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013250892050564289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013239526189863682, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012254013679921627, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008931211195886135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013250892050564289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013250892050564289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07664553076028824, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07187844067811966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06977716088294983, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06329670548439026, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03528113290667534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.033344365656375885, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04046257212758064, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03733249008655548, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03579428791999817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03190915286540985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03052423894405365, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.020468931645154953, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01783984899520874, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016938557848334312, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01671980507671833, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01028062216937542, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008851252496242523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00875691045075655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008185862563550472, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00805297214537859, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0055021243169903755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005620789248496294, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005211549811065197, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0039310334250330925, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01028062216937542, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01028062216937542, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06689906865358353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0625515952706337, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06020738556981087, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.054431308060884476, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.030436251312494278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02847800776362419, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.035631000995635986, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.032898131757974625, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.030876807868480682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0274544395506382, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.026226777583360672, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.017901094630360603, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015657536685466766, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014530918560922146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014268371276557446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008958788588643074, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00750218378379941, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007376295048743486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006910962052643299, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006744071375578642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004729064181447029, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004707839339971542, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.00437506940215826, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0031052911654114723, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014530918560922146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014530918560922146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.1820281744003296, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1711445152759552, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.16775009036064148, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1522625833749771, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08384767174720764, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08002018928527832, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09378261864185333, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08634138852357864, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08498101681470871, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07591236382722855, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07233385741710663, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.04740222543478012, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04105716571211815, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.03991464152932167, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.03963439166545868, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.023602396249771118, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02018149569630623, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.020089326426386833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01853877864778042, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018367119133472443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012089567258954048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.011699591763317585, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011627551168203354, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0070478967390954494, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012089567258954048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012089567258954048, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17097170650959015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1358651965856552, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12106379866600037, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0937676653265953, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07554875314235687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.060768477618694305, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09539824724197388, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08766529709100723, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07976114004850388, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05456181615591049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.049780480563640594, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.048730235546827316, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04206645116209984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03658285737037659, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.035202592611312866, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.024441640824079514, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019623467698693275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019102269783616066, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015740493312478065, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014765145257115364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013265720568597317, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013670687563717365, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011474358849227428, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009881922043859959, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014765145257115364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014765145257115364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1798507124185562, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17036962509155273, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16748759150505066, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15315794944763184, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0838397964835167, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08053334057331085, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09262260794639587, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08573539555072784, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08483182638883591, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07655280828475952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07329805195331573, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04706830531358719, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04098498076200485, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04013295844197273, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0399308055639267, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02349868416786194, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.0207822322845459, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02072499878704548, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01932472549378872, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0191990714520216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012397964484989643, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012726746499538422, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01209927350282669, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008731450885534286, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012397964484989643, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012397964484989643, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21887068450450897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20734578371047974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2038450986146927, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18645936250686646, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10191452503204346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09790202975273132, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11250139027833939, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10418570786714554, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10312285274267197, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09308198094367981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08900792896747589, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.057046156376600266, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04962741583585739, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04860229790210724, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.048365943133831024, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.028451986610889435, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02475300431251526, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02469130977988243, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022943610325455666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022791100665926933, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014743901789188385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014525612816214561, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014383621513843536, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.0091655682772398, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014743901789188385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014743901789188385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.43.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1885816752910614, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17141065001487732, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1639731526374817, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1486707180738449, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08460131287574768, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0782487690448761, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0994090810418129, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09152057766914368, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08691226691007614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07540856301784515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07216822355985641, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05018217861652374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.043511487543582916, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04047596827149391, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.039741016924381256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025167938321828842, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02113397605717182, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02085256576538086, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019365785643458366, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018909206613898277, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013530111871659756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013552778400480747, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012557199224829674, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009362971410155296, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013530111871659756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013530111871659756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0942288488149643, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08852443844079971, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.086294986307621, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07838848233222961, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.04342019185423851, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04123774543404579, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.049261145293712616, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04534786939620972, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.04401027038693428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0393233597278595, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03764944151043892, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.024943619966506958, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.02166321873664856, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.020803600549697876, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02059810422360897, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.012489541433751583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.010809496976435184, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.010726416483521461, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.010000335052609444, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009872461669147015, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006613399367779493, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006742296274751425, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.006326793693006039, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004643639083951712, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.012489541433751583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.012489541433751583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07527513802051544, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.07046807557344437, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0681450366973877, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.061700329184532166, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03419435769319534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0321769043803215, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.039604783058166504, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0365859717130661, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.034707341343164444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.030893508344888687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029571237042546272, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01992517150938511, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017400002107024193, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0163224246352911, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016074858605861664, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009964218363165855, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008382598869502544, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008266625925898552, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007726799696683884, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007564013358205557, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005220044404268265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005171438679099083, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004871731158345938, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0033552879467606544, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009964218363165855, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009964218363165855, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2034580111503601, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1914771944284439, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18781819939613342, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17079006135463715, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09376271814107895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08962829411029816, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10467827320098877, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0963805615901947, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09497509151697159, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08506813645362854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08121675252914429, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.052898161113262177, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04582604393362999, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.044609878212213516, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04432607442140579, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026351748034358025, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02257208526134491, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022486818954348564, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02078086882829666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020595449954271317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013496502302587032, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01306550670415163, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013004442676901817, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007877550087869167, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013496502302587032, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013496502302587032, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17258888483047485, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1486697643995285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.14108015596866608, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.11592955142259598, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0769059881567955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06828226149082184, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09131976962089539, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08185930550098419, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07950635254383087, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.061344537883996964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05597379803657532, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04656378924846649, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.039956770837306976, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03778949752449989, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03726259991526604, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.024457769468426704, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0211967583745718, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02106623165309429, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018467778339982033, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01814476028084755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015099397860467434, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015222045592963696, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014462728053331375, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012437965720891953, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014462728053331375, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.014462728053331375, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17217902839183807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16313299536705017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16034023463726044, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14657166600227356, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08018086850643158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07700575888156891, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08866432309150696, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08201450109481812, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08112501353025436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07319848984479904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07008731365203857, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.045080091804265976, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.039172571152448654, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.038349252194166183, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.038154542446136475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02252814546227455, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.019818810746073723, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.019762808457016945, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.018423117697238922, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01830318011343479, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011940323747694492, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012086004950106144, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011653218418359756, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008226785808801651, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011940323747694492, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011940323747694492, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.21947456896305084, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2079787403345108, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20446081459522247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18706439435482025, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10218773037195206, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09818197786808014, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11284810304641724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10448072105646133, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10337508469820023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09333857148885727, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08931420743465424, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.057299502193927765, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04978446662425995, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04874644801020622, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04850035533308983, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02856934815645218, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024854665622115135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.024783506989479065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02304595708847046, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022894252091646194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014864906668663025, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014640239998698235, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014499017037451267, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009311427362263203, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014864906668663025, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014864906668663025, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.44.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1840309351682663, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16846546530723572, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1614300161600113, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14660324156284332, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0827748253941536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07688949257135391, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0969744473695755, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08960375934839249, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08486142009496689, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07421119511127472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07105042040348053, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04888394847512245, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04251132160425186, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03950515016913414, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03877503424882889, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02438129298388958, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020425351336598396, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020131569355726242, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.018750159069895744, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0182924997061491, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012798032723367214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012843410484492779, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011822051368653774, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008525855839252472, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012798032723367214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012798032723367214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08095015585422516, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07602235674858093, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07393291592597961, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06713538616895676, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.037367675453424454, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03539453446865082, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04261211305856705, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03935862332582474, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03789186850190163, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03384019434452057, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03238743171095848, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.021585095673799515, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.018812421709299088, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01793013885617256, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.017718760296702385, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010810445062816143, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00934370793402195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009254726581275463, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008648982271552086, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00851667020469904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005738103296607733, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005894019268453121, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005453797522932291, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004091317765414715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010810445062816143, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010810445062816143, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07184670120477676, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06728392094373703, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06489649415016174, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05880805850028992, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03280593082308769, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.030817359685897827, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0385180301964283, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.035290010273456573, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.033271294087171555, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02965630404651165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.028444163501262665, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019426681101322174, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.016793373972177505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015701990574598312, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015435622073709965, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009741682559251785, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008109074085950851, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007984164170920849, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007484402507543564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.00731482682749629, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005152361933141947, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005068901926279068, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004785333294421434, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003365947399288416, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009741682559251785, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009741682559251785, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2139168083667755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2015426754951477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19753038883209229, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17991556227207184, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09924881160259247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09481067955493927, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11055971682071686, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10198789834976196, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10053782165050507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09002316743135452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08583030104637146, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05595649033784866, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04854776710271835, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04730075225234032, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04698219895362854, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027887312695384026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023935379460453987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023831922560930252, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02203010395169258, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021836135536432266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01428636908531189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01387761440128088, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013773499988019466, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008389532566070557, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01428636908531189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01428636908531189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16469433903694153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12283900380134583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10849953442811966, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08488184958696365, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07064314186573029, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05519063398241997, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08974207937717438, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08042433112859726, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07628992944955826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0479210764169693, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04551472142338753, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04558385908603668, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03880786523222923, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03448678180575371, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03339272364974022, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023546073585748672, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.018959563225507736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018701674416661263, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014968463219702244, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014204693958163261, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0140421437099576, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013563825748860836, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012717081233859062, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010454514995217323, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014204693958163261, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014204693958163261, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17544159293174744, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16623710095882416, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16341634094715118, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14949670433998108, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08168989419937134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07847557961940765, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09037027508020401, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08354710042476654, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08265025168657303, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07459389418363571, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07146459072828293, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04593581333756447, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.039894960820674896, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.039061129093170166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03886359557509422, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.022954434156417847, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020161056891083717, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020105542615056038, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.0187417920678854, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01861654594540596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01216183602809906, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01225181296467781, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011873634532094002, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008290684781968594, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01216183602809906, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01216183602809906, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.222603902220726, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21094879508018494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20745672285556793, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1898616999387741, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10363428294658661, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09956876188516617, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11447270214557648, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10596392303705215, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10484933108091354, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09470177441835403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0906682014465332, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05814334750175476, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05049639195203781, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04944831505417824, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04920355603098869, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029003888368606567, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025222647935152054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025153404101729393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02339959144592285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023242121562361717, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015134832821786404, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01487800944596529, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014771134592592716, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009487743489444256, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01487800944596529, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014771134592592716, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.45.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18817563354969025, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.173165962100029, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1662004292011261, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1510452926158905, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08480383455753326, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0790836364030838, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09930293262004852, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09161726385354996, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08674807101488113, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07629182934761047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07313601672649384, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05003957077860832, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04345202445983887, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.040473565459251404, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03975072503089905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02497333660721779, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020924575626850128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020624646916985512, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01925974152982235, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018809810280799866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013160072267055511, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013135937042534351, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012194337323307991, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008728212676942348, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013160072267055511, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013160072267055511, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07579607516527176, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07101798057556152, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06878969073295593, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06237327679991722, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.034907419234514236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.032900355756282806, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.040128082036972046, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.037139322608709335, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.035419635474681854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.031516119837760925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030120881274342537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.020303264260292053, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017715541645884514, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016725488007068634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01648910529911518, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010161211714148521, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008683335967361927, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008579280227422714, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008012617006897926, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007865102961659431, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005367640871554613, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005460161250084639, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005048678256571293, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0037171354051679373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010161211714148521, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010161211714148521, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06639812886714935, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06213817372918129, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05955207347869873, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05391828715801239, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.030235784128308296, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.028164898976683617, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.036094967275857925, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.033208876848220825, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03069061040878296, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02731928415596485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.026270221918821335, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018171343952417374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015772096812725067, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014477269724011421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014141643419861794, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009107238613069057, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007465837523341179, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007310144603252411, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006883352063596249, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006684860680252314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004795108921825886, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0047179521061480045, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004386639688163996, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0030893152579665184, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014477269724011421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014477269724011421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20158065855503082, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1895001381635666, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18575677275657654, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1687917560338974, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09282620251178741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0885845497250557, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10380684584379196, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.0954657644033432, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09407687932252884, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0840064063668251, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08010461926460266, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05234955623745918, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04534981772303581, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04414422810077667, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04385468363761902, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02608492225408554, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02227208949625492, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02218630723655224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020446274429559708, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020273441448807716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013329963199794292, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012833797372877598, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012813950888812542, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007608511485159397, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013329963199794292, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013329963199794292, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.16624295711517334, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13778705894947052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12429159134626389, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09684034436941147, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07159276306629181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.059771712869405746, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09656889736652374, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08638495206832886, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07705706357955933, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.057481661438941956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05008164048194885, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.048862408846616745, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.042030028998851776, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03549555316567421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03379523381590843, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.025249844416975975, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.02031903713941574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019749239087104797, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01793275773525238, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.016921162605285645, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015056721866130829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01561831682920456, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013218231499195099, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012568823993206024, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013218231499195099, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.013218231499195099, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17924651503562927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16981856524944305, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16695506870746613, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15269778668880463, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08347853273153305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08019015938043594, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09229321032762527, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.085387222468853, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0844687670469284, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07622166723012924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07292476296424866, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.046852290630340576, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04076122120022774, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03990483656525612, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.039701662957668304, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0233917236328125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020554661750793457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02049979381263256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019094238057732582, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01896827109158039, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012300551868975163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012433519586920738, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012004926800727844, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008335848338901997, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012300551868975163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012300551868975163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22678673267364502, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2148928940296173, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21129213273525238, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1933436095714569, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10563149303197861, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10147728770971298, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11661264300346375, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10798093676567078, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10687107592821121, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09649131447076797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0923161581158638, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05916491895914078, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.051463767886161804, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05039385333657265, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05013386532664299, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029507562518119812, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02570304088294506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025630434975028038, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02383219636976719, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0236732829362154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015324699692428112, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01513984426856041, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014946839772164822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009633157402276993, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014946839772164822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014946839772164822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.46.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18846827745437622, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17428120970726013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16765399277210236, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1526394486427307, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08518023043870926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07974918931722641, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09924591332674026, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09165666252374649, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08696576207876205, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07698272168636322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0737382024526596, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.049901410937309265, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04343530908226967, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.0406026691198349, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03991570696234703, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024903258308768272, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0208697821944952, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02057952992618084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019245048984885216, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01881631650030613, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013031996786594391, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012909244745969772, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012114636600017548, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008369614370167255, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013031996786594391, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013031996786594391, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08125334978103638, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07650873810052872, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07425146549940109, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06757016479969025, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03759303316473961, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03555571660399437, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04335469752550125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03992139920592308, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03810172155499458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03415018692612648, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.032817292958498, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.021962907165288925, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.019107641652226448, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.018072130158543587, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.017820211127400398, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011072397232055664, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009474785067141056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009362905286252499, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008805417455732822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008649047464132309, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.00599241815507412, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006072156131267548, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005670810118317604, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004284881055355072, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011072397232055664, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011072397232055664, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07392744719982147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06945095211267471, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06689117103815079, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06073358654975891, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03385956957936287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03178608417510986, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.039645835757255554, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03657693415880203, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.034312255680561066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.030653217807412148, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029409809038043022, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019905628636479378, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01741345226764679, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.016192035749554634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015891075134277344, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009969704784452915, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008347188122570515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008206512778997421, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007711490150541067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007525989320129156, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005257627461105585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005212532822042704, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004883121233433485, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003417644649744034, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009969704784452915, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009969704784452915, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22036534547805786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20817860960960388, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20437407493591309, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1865357905626297, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10250570625066757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09815512597560883, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1143004298210144, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10529249906539917, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10385521501302719, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09330727159976959, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0892111212015152, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05788138881325722, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.050136446952819824, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04887261241674423, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04857974871993065, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02887299470603466, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0247246902436018, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024632276967167854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022822996601462364, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02262822538614273, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01478183176368475, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01431155577301979, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014250753447413445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008634897880256176, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01478183176368475, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01478183176368475, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17007549107074738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13485896587371826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12206613272428513, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09584670513868332, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07411880046129227, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06101616844534874, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09408554434776306, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08390672504901886, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07854042202234268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05313008651137352, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05016718804836273, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04770686477422714, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.041258957237005234, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03695055842399597, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03590375557541847, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0252822358161211, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.021448520943522453, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.021150492131710052, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.018040908500552177, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.017373347654938698, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01587272435426712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.016325192525982857, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01475097518414259, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013597451150417328, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01475097518414259, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01475097518414259, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17620034515857697, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16698628664016724, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1641460806131363, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1502387374639511, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08206618577241898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07883668690919876, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09072960168123245, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08393192291259766, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08303101360797882, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07497687637805939, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07179728895425797, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.046072494238615036, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04004999250173569, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03921134024858475, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03901667520403862, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023010289296507835, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020181143656373024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02012723684310913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.018749959766864777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.018626844510436058, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012112678028643131, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012176653370261192, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011821532621979713, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00813101977109909, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012112678028643131, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012112678028643131, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22373336553573608, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21209721267223358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2085791975259781, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19097451865673065, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10418877005577087, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10014787316322327, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11509321630001068, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10655350238084793, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10542252659797668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0952536091208458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0911891832947731, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.058413971215486526, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.050765424966812134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04970845952630043, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0494634285569191, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029141513630747795, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02533940225839615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025269459933042526, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023504696786403656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02334989234805107, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015166080556809902, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014910195022821426, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014791865833103657, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009467345662415028, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014910195022821426, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014791865833103657, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.47.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18600338697433472, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17174886167049408, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16496704518795013, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1501021385192871, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08401777595281601, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07850931584835052, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09847612679004669, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09068204462528229, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08581674844026566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07578723877668381, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07262695580720901, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04952482879161835, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04304774850606918, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.040145792067050934, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03944263607263565, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024875091388821602, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020809577777981758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020514236763119698, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01920197531580925, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018766429275274277, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013314571231603622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013130294159054756, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012395892292261124, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008831454440951347, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013314571231603622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013314571231603622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0713094174861908, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06693263351917267, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06461839377880096, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05870971083641052, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.032950520515441895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.030962754040956497, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.038474515080451965, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.035387635231018066, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03343770280480385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.029823612421751022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02863793447613716, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019480308517813683, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01695270463824272, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015843702480196953, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.015579094178974628, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009803446009755135, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008298327215015888, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008177061565220356, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007686960510909557, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007522225379943848, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005275362636893988, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005343255586922169, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004932526499032974, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0037374338135123253, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009803446009755135, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009803446009755135, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.060790300369262695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.056913260370492935, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.054361771792173386, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04919637367129326, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02770245261490345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02568802610039711, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03330128267407417, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.030700961127877235, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02809511125087738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0250443946570158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02403341419994831, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016734400764107704, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014615343883633614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013264330103993416, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01293344795703888, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00838440377265215, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006866165902465582, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006705137901008129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006341379135847092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006136559881269932, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004433371126651764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004393834620714188, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004018913023173809, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00290200999006629, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014615343883633614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014615343883633614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2083044946193695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1958768367767334, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1918526291847229, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17443493008613586, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09625447541475296, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09183648973703384, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10775361210107803, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09901861846446991, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09754986315965652, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08711293339729309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08311916887760162, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.054476119577884674, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.047112300992012024, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04581896588206291, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.045536234974861145, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027146989479660988, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02319292537868023, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023104606196284294, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021306419745087624, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021102041006088257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013906040228903294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013454094529151917, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013353305868804455, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008117576129734516, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013906040228903294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013906040228903294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13065609335899353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09961261600255966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08766598999500275, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0721113532781601, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.054789118468761444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.043031565845012665, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07380260527133942, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06563831120729446, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06062982603907585, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04159488156437874, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03847424313426018, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.037480179220438004, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0320880264043808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.027299001812934875, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.026050148531794548, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.019693735986948013, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015822822228074074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015515468083322048, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013651623390614986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012903030961751938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01223883219063282, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012288420461118221, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01092855166643858, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010065276175737381, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013651623390614986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013651623390614986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17926743626594543, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16990935802459717, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16704250872135162, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1529003381729126, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08354485034942627, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0802847295999527, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09226781129837036, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08544870465993881, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08452677726745605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07634680718183517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07310432940721512, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04687764495611191, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04080570489168167, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03995541110634804, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.039754126220941544, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023410838097333908, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.020614510402083397, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020557107403874397, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019167324528098106, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0190422385931015, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012326639145612717, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012514293193817139, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012030210345983505, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008450605906546116, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012326639145612717, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012326639145612717, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22464196383953094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21295265853405, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20942652225494385, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19176070392131805, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10464149713516235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10055124759674072, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11547375470399857, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10697580128908157, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10585261136293411, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09564831107854843, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0915890708565712, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.058595068752765656, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05099034309387207, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04993068426847458, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.049679823219776154, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0292324461042881, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025480469688773155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025411704555153847, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02364347130060196, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023488841950893402, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015185406431555748, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015039841644465923, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0148138627409935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009610442444682121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0148138627409935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0148138627409935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.48.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1907551884651184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17680735886096954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17036888003349304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15506421029567719, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08629556000232697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08099232614040375, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0999603122472763, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09238310158252716, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08797207474708557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07801550626754761, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07468435168266296, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.050268929451704025, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.043815068900585175, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.041143909096717834, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04049738869071007, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025123631581664085, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.021181778982281685, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020913394168019295, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019552037119865417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01915006712079048, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013200712390244007, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013121379539370537, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012340988032519817, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008599703200161457, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013200712390244007, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013200712390244007, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06037287786602974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05669017136096954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0545056015253067, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04952400177717209, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0278865285217762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02610989660024643, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.032836366444826126, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.030338147655129433, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.028300095349550247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02524997480213642, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024236923083662987, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0165901780128479, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014501893892884254, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013416282832622528, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013150487095117569, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008336937054991722, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007034589070826769, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0069105313159525394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0065182470716536045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00635685445740819, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004471581429243088, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004566137678921223, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00414658896625042, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0031928597018122673, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014501893892884254, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014501893892884254, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05281161516904831, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.049491606652736664, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04710216447710991, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.0427091158926487, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.024155892431735992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02236064337193966, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029487742111086845, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.027020659297704697, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02451205812394619, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.021886607632040977, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.021151749417185783, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014830113388597965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.012893930077552795, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011601382866501808, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011271991766989231, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00744470814242959, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006028896197676659, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005865076091140509, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0055792792700231075, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005379675887525082, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003953579347580671, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00391332246363163, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0035614666994661093, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002605011686682701, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014830113388597965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014830113388597965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2015257179737091, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18995904922485352, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18609915673732758, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1695595234632492, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09372542798519135, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08945819735527039, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10496518015861511, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09659702330827713, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.0949692502617836, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08493687212467194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08103489875793457, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05306622013449669, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04597250372171402, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04467809572815895, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04438096657395363, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02647215686738491, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022582272067666054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02247512713074684, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020742330700159073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020549030974507332, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013523094356060028, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013044764287769794, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01297507993876934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007793880999088287, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013523094356060028, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013523094356060028, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.12071866542100906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08858783543109894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07067253440618515, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06121768802404404, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.051123566925525665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03645871952176094, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0742715373635292, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06762024760246277, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05689580738544464, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03849568963050842, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03856828063726425, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.038280729204416275, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03321799635887146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.025955157354474068, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.023972181603312492, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.019940093159675598, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015683893114328384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015023925341665745, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013748631812632084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012621255591511726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012191060930490494, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013029626570641994, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010234125889837742, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010825537145137787, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013748631812632084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013748631812632084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18317760527133942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17365780472755432, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.170758917927742, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15635889768600464, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08542623370885849, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08209498226642609, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09459031373262405, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08735735714435577, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08642303943634033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07807649672031403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07492052018642426, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.048100367188453674, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04173733666539192, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04087124764919281, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04066871851682663, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02405450865626335, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021125057712197304, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021065853536128998, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019652049988508224, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019519992172718048, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279534213244915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012873669154942036, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012495053932070732, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00876450166106224, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279534213244915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279534213244915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22617319226264954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21443188190460205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2109041064977646, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.193158358335495, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10538811981678009, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1012786477804184, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11648090928792953, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10773754119873047, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1066054180264473, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0963679850101471, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09233128279447556, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.059181153774261475, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.051377199590206146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05031590163707733, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.050066959112882614, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02953428216278553, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02571282722055912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025638705119490623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02386932075023651, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023708991706371307, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015451365150511265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015221867710351944, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015077846124768257, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009791792370378971, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009791792370378971, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009791792370378971, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.49.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19366464018821716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.17975705862045288, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17332817614078522, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1578008234500885, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08759862184524536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08236025273799896, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1019003614783287, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09382674098014832, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08927545696496964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07937560975551605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07604251801967621, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05120315030217171, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.044480592012405396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04178047552704811, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04112415760755539, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02566615864634514, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02153952233493328, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021264422684907913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01992114633321762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019518151879310608, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013651860877871513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013376173563301563, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01278759352862835, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008824877440929413, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013651860877871513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013651860877871513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06711247563362122, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06279286742210388, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.060483578592538834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05480360984802246, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030960174277424812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028985418379306793, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.036281608045101166, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03342893347144127, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03143666312098503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027907125651836395, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.026761725544929504, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0183849036693573, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015981260687112808, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014888600446283817, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014621326699852943, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009233811870217323, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00779368169605732, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007671144790947437, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00719486316666007, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007030710577964783, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004925945773720741, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005016174633055925, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004587125964462757, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0034924098290503025, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014888600446283817, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014888600446283817, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05795658752322197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05419524759054184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05140916258096695, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.046444013714790344, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.026389410719275475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02432309277355671, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03239183872938156, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.029688220471143723, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02680436708033085, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.023825697600841522, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.023021407425403595, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016300909221172333, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014134254306554794, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012663761153817177, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012308224104344845, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008172723464667797, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006594524253159761, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006407583132386208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006086943205446005, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005860195495188236, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004352862481027842, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004290089476853609, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0039053228683769703, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002860794309526682, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014134254306554794, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014134254306554794, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2044488489627838, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1919458657503128, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18788957595825195, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17071916162967682, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09464110434055328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09011580795049667, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10621181130409241, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09765240550041199, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09600615501403809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08545339107513428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.081508569419384, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05370587855577469, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04643431678414345, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04509379342198372, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0447503961622715, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.026754990220069885, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022783201187849045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022668849676847458, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02086738310754299, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02066652849316597, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01368116494268179, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013180981390178204, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013107775710523129, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007858012802898884, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01368116494268179, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01368116494268179, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1606142371892929, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12213682383298874, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10865738242864609, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08496477454900742, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06858047097921371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05590198189020157, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08561486750841141, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07837551832199097, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07399237155914307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.046227116137742996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04568473994731903, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.043514009565114975, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.037497684359550476, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0331200435757637, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.032013557851314545, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021902820095419884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017653964459896088, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0173642598092556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013539222069084644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012719097547233105, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012042657472193241, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012092276476323605, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010613287799060345, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008667475543916225, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013539222069084644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013539222069084644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18538790941238403, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1756846010684967, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17272838950157166, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15812663733959198, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08645373582839966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08306922018527985, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09562329947948456, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08841438591480255, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08746679127216339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07898397743701935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07563057541847229, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04864431172609329, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04223937541246414, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04135509580373764, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0411500446498394, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024307716637849808, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02135821245610714, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02129978872835636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019856728613376617, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019727196544408798, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012855996377766132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012985199689865112, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012552345171570778, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008799860253930092, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012855996377766132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012855996377766132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22868435084819794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21682658791542053, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21321892738342285, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1952376812696457, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10657712817192078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10243558138608932, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11768127977848053, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10895482450723648, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10782124102115631, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09742246568202972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09323245286941528, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05973372235894203, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05194459483027458, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05086733400821686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05061381682753563, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029787559062242508, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.025932585820555687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.025861728936433792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024058226495981216, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02389371395111084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01547553576529026, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015271317213773727, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015092595480382442, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009703900665044785, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009703900665044785, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009703900665044785, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.50.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1975347250699997, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18389584124088287, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17757752537727356, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1618773639202118, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08944389224052429, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08430768549442291, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1033058911561966, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09545865654945374, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09106241911649704, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0812324732542038, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07780349999666214, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05195172131061554, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.045183777809143066, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.042561717331409454, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.041927631944417953, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025909731164574623, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02177257277071476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0215047188103199, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020144658163189888, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019748134538531303, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013502583838999271, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013263647444546223, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012645181268453598, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00843412708491087, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013502583838999271, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013502583838999271, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07163333892822266, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0671650767326355, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06484904140233994, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05889824405312538, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0330648310482502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.031073104590177536, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03842751309275627, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.035489488393068314, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03354833275079727, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02989204041659832, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02862406149506569, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019434167072176933, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.016946466639637947, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015875527635216713, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.015613395720720291, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009748389944434166, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008278598077595234, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008160809986293316, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007656559813767672, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007493998389691114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005188987124711275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005278692580759525, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004855514969676733, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0036382456310093403, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009748389944434166, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009748389944434166, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.0640782043337822, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05995972454547882, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.057268176227808, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.0518498420715332, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02922184020280838, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.027189578860998154, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03500381484627724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03225444629788399, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.029675696045160294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026405468583106995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.025310205295681953, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01758522540330887, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015321300365030766, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014017568901181221, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01369447074830532, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008820352144539356, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007279368583112955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007115703541785479, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006717313546687365, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006510674487799406, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0046909041702747345, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.00466656219214201, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004282703623175621, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0031185117550194263, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014017568901181221, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014017568901181221, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2114519476890564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19871117174625397, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19461695849895477, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17681285738945007, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09778984636068344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09325093030929565, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10948214679956436, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10078535974025726, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09918607026338577, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08838935941457748, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08438602089881897, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05541333183646202, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.047930795699357986, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04659587889909744, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04627243056893349, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02764711156487465, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02357708103954792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02348082885146141, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02163739502429962, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02144140563905239, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014155508019030094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013661215081810951, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013580330647528172, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008234060369431973, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014155508019030094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014155508019030094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14321571588516235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.12335803359746933, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10775110870599747, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07992950826883316, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06433919072151184, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.052510373294353485, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08992097526788712, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08222618699073792, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06636297702789307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05064027011394501, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.045341622084379196, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04580218717455864, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03949525207281113, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0315195769071579, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.029351331293582916, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023274196311831474, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017446953803300858, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01640927977859974, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015023739077150822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013665063306689262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013207081705331802, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013228364288806915, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010873876512050629, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009867976419627666, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013665063306689262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013665063306689262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18880923092365265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.178856760263443, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17585496604442596, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16094870865345, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08805735409259796, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0845736563205719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09736587852239609, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09006252139806747, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08908502757549286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0804084986448288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07698241621255875, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04940645769238472, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04297894239425659, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0420808345079422, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04187089204788208, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024685019627213478, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021648375317454338, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02158801630139351, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020106632262468338, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0199726652354002, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012981357052922249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013044523075222969, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012667382135987282, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008688227273523808, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012981357052922249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012981357052922249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23035210371017456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21835246682167053, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21471759676933289, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19660356640815735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1074059009552002, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10319621115922928, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11866437643766403, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10981372743844986, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1086660698056221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09815209358930588, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09396723657846451, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06028832122683525, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05235885828733444, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.051266804337501526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05101675167679787, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03007190302014351, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02615968883037567, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026086648926138878, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02426132746040821, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024099811911582947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01567707769572735, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01542797964066267, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015294290147721767, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009840319864451885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009840319864451885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009840319864451885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.51.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.2028999775648117, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18902099132537842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1829126924276352, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1666986644268036, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09195519983768463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08686736226081848, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10551656782627106, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09750161319971085, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09351729601621628, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0835144966840744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0799868032336235, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.053090743720531464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04623948037624359, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04383011534810066, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04324856027960777, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026607342064380646, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022575771436095238, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022333253175020218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020921554416418076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02056257613003254, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014096374623477459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013917827047407627, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013324165716767311, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009188043884932995, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014096374623477459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014096374623477459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08413773775100708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07884808629751205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0762551799416542, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0691816434264183, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03883545473217964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03653740882873535, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04507572948932648, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04149617254734039, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03940991312265396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03507685661315918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.033661261200904846, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.022860420867800713, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.019870039075613022, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.018680179491639137, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.018388330936431885, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01152910478413105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009798543527722359, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009667220525443554, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009065208025276661, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00888916477560997, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006225846242159605, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006308338604867458, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005847014486789703, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004444402642548084, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01152910478413105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01152910478413105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07088913023471832, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06621992588043213, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06343426555395126, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05742425099015236, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.032218292355537415, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03001413494348526, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03847171738743782, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.035328689962625504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03272600471973419, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.029090918600559235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02803186886012554, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01934110000729561, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.016802646219730377, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01542686577886343, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015080047771334648, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009692332707345486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007958119735121727, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00779002346098423, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007329986896365881, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007119153160601854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005106727126985788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005019358359277248, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004669321235269308, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003274681745097041, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009692332707345486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009692332707345486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2066659927368164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1940673291683197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19007854163646698, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17234694957733154, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09505481272935867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09064020961523056, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10653342306613922, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09788164496421814, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09637616574764252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08589806407690048, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08184211701154709, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0537995882332325, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0465235561132431, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04522496089339256, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.044922295957803726, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02678842470049858, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02285768836736679, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02276158146560192, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02097744680941105, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02077339217066765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013709243386983871, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013238267041742802, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013146699406206608, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0079428656026721, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013709243386983871, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013709243386983871, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1478326916694641, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11593770235776901, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10416222363710403, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.09007507562637329, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06315536051988602, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05053383857011795, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08190960437059402, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07340594381093979, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0676269456744194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04963172227144241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.045913659036159515, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.041565053164958954, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.036458633840084076, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03200665861368179, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.030871925875544548, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022040151059627533, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019306402653455734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018995165824890137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01718960329890251, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01655283197760582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013985825702548027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015436640940606594, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012738281860947609, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013288215734064579, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013985825702548027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013985825702548027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1903654783964157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18038780987262726, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17733564972877502, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1623029261827469, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08884233981370926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08532930910587311, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0981920063495636, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09086733311414719, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08989723771810532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08112287521362305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07768003642559052, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04986906796693802, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043384406715631485, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04247191175818443, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04225985333323479, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024928264319896698, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021853333339095116, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021796176210045815, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020295344293117523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020163193345069885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013112778775393963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013181806541979313, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012798749841749668, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008790172636508942, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013112778775393963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013112778775393963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23205353319644928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21997761726379395, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21627697348594666, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19799531996250153, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10822000354528427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10397902876138687, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11954441666603088, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11065956950187683, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10949065536260605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09885707497596741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09469321370124817, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06065507233142853, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05277574062347412, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05167011171579361, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.051409222185611725, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030291832983493805, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02639179490506649, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026323476806282997, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024483006447553635, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024319520220160484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015819670632481575, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015614386647939682, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015435021370649338, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010025197640061378, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010025197640061378, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010025197640061378, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.52.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19853420555591583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18513929843902588, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17889954149723053, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16320210695266724, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08999185264110565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08494938910007477, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10379642993211746, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09583435207605362, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09153563529253006, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08182797580957413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07840348780155182, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.052083637565374374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.045402128249406815, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.0428696908056736, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.042249731719493866, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026000892743468285, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022006498649716377, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021743880584836006, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020398953929543495, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020013608038425446, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013563401997089386, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013500194996595383, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012735048308968544, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008749390952289104, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013563401997089386, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013563401997089386, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06384659558534622, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05978918448090553, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05741255730390549, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05211454629898071, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02948760986328125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02755052223801613, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0348220057785511, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03213370591402054, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.029940452426671982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.026634421199560165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.025557737797498703, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01762600988149643, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015359291806817055, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014198621734976768, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013913946226239204, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008879728615283966, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007463366258889437, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007327761966735125, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006904536858201027, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006726905703544617, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004822778515517712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0048709530383348465, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004470924846827984, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003427177667617798, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014198621734976768, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014198621734976768, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.053024016320705414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.049559831619262695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04685555025935173, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.042408011853694916, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.024209843948483467, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02224540151655674, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03007986955344677, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.027535296976566315, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.024584921076893806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02185593731701374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.021171029657125473, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015075488947331905, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01310476940125227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01165540050715208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011297815479338169, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007580829784274101, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006102598272264004, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005915498360991478, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005645181983709335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0054182019084692, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0040689497254788876, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0040406095795333385, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0036254997830837965, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002741786651313305, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01310476940125227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01310476940125227, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21523070335388184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20240671932697296, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1981533318758011, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1800532042980194, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09993961453437805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09529861807823181, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11194362491369247, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10293970257043839, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10143456608057022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09029372781515121, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08613082021474838, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05666114762425423, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04903425648808479, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04767080396413803, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04734470695257187, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028256123885512352, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024133140221238136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024034997448325157, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02211514301598072, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02191220223903656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014474272727966309, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014022822491824627, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013883539475500584, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008489856496453285, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014474272727966309, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014474272727966309, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1217656284570694, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09251739829778671, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07977033406496048, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06642922013998032, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05320269986987114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04175655543804169, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.06953133642673492, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06342422217130661, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05630365014076233, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03784344345331192, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03671136870980263, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03554931655526161, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.030594343319535255, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.026038238778710365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02485596388578415, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01804688200354576, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014427579008042812, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.013974129222333431, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.011757382191717625, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.010973398573696613, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.010346246883273125, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.010632874444127083, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.008982651866972446, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008170202374458313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014427579008042812, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014427579008042812, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19098378717899323, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18094006180763245, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17783306539058685, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16271564364433289, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08913040161132812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08560232818126678, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09855206310749054, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09117911010980606, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09019613265991211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08136938512325287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07789411395788193, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05004952475428581, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04352843761444092, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04261093959212303, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042392563074827194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0250124242156744, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02193770371377468, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021879158914089203, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02036806009709835, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020234985277056694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013186877593398094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013246545568108559, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01287020556628704, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00885968841612339, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013186877593398094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013186877593398094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23308928310871124, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22085081040859222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21716348826885223, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.198771670460701, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10870646685361862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10441121459007263, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12006437033414841, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1111668050289154, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10999695211648941, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09928290545940399, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09502004832029343, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06093619018793106, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05299253389239311, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05187384784221649, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05161063373088837, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03039184771478176, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02643122524023056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02635476179420948, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02449706196784973, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024330519139766693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015781991183757782, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015538555569946766, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015389790758490562, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009829413145780563, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009829413145780563, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009829413145780563, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.53.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20218075811862946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18861813843250275, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18250489234924316, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16648221015930176, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09157121181488037, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0865563303232193, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10531292855739594, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09725815802812576, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0931161642074585, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08332906663417816, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07984889298677444, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.052944015711545944, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.046039558947086334, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04356331378221512, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.042970363050699234, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026394696906208992, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02228807657957077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022039612755179405, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020658142864704132, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020283648744225502, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013750966638326645, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013545168563723564, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012934669852256775, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008638177067041397, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013750966638326645, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013750966638326645, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06667477637529373, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06246361881494522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06010676920413971, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.054493650794029236, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0307916346937418, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028811944648623466, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.036139972507953644, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.033360421657562256, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.031261250376701355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02779683656990528, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.026643842458724976, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01828087866306305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015949787572026253, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014812401495873928, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014536538161337376, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009180733002722263, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0077665033750236034, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007639672607183456, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0071783787570893764, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007011401932686567, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004913220182061195, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005027347709983587, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004566714633256197, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003520775819197297, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014812401495873928, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014812401495873928, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.055309753865003586, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05174429342150688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04894383251667023, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.0442771278321743, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0252807829529047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.023201944306492805, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03148437291383743, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.028753869235515594, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02565586566925049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.022888077422976494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022126851603388786, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01580834947526455, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013699124567210674, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012154459953308105, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011767174117267132, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007931268773972988, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006344013847410679, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006143272388726473, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005872434936463833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005631027277559042, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004238774999976158, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004181018564850092, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003767338814213872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00281442585401237, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013699124567210674, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013699124567210674, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21320408582687378, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20026513934135437, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19597630202770233, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17802254855632782, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09869802743196487, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09405913949012756, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11059483140707016, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10176686197519302, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10009793192148209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08913181722164154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08489831537008286, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.055926624685525894, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04842614382505417, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.047044914215803146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.046701349318027496, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027888337150216103, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023818103596568108, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02370261214673519, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021829113364219666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021619148552417755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01427703071385622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013828091323375702, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013698487542569637, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00835841242223978, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01427703071385622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01427703071385622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15327009558677673, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11757226288318634, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10148189961910248, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07871564477682114, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06387422978878021, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05008433386683464, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08716526627540588, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08003939688205719, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07123956084251404, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.046984583139419556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.044972777366638184, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.044256825000047684, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03841429203748703, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.031130772083997726, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02916352078318596, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022325215861201286, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.016982335597276688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01637938618659973, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013964378274977207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012638533487915993, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012147470377385616, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012563934549689293, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00975289847701788, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009137768298387527, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013964378274977207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013964378274977207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19153539836406708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1814051866531372, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1782984733581543, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16311463713645935, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08939898014068604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08584069460630417, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09882736951112747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0914531797170639, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09046639502048492, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08155582845211029, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0780671164393425, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05018743500113487, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04364238306879997, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04271692782640457, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04249787703156471, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025058144703507423, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02193966880440712, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021876392886042595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02035484090447426, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020217198878526688, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01315711997449398, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013168413192033768, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01283216942101717, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00870117824524641, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01315711997449398, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01315711997449398, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23483869433403015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22251586616039276, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21876682341098785, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.2002038061618805, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10958965122699738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10525613278150558, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12103122472763062, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1120757982134819, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1108739972114563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.1000380590558052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09574027359485626, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06145825237035751, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053422749042510986, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052300769835710526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05203252285718918, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030645640566945076, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026649270206689835, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026572320610284805, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024696839973330498, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02452898770570755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015922099351882935, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015667729079723358, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015526125207543373, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009916314855217934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009916314855217934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009916314855217934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.54.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.2050071358680725, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19171176850795746, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1856781244277954, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16946296393871307, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09293268620967865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08802597224712372, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1065271645784378, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09850001335144043, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09440968930721283, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08470651507377625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08119724690914154, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05353589728474617, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04657689854502678, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04416292905807495, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04358488693833351, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026649445295333862, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022499017417430878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02224896289408207, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020864877849817276, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02049759402871132, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013786603696644306, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013519417494535446, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012990385293960571, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008435209281742573, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013786603696644306, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013786603696644306, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07492109388113022, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07037483155727386, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06799578666687012, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.061768241226673126, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.034669242799282074, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.032617662101984024, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04011499881744385, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0372014194726944, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03514176979660988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.031375348567962646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030002424493432045, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.020285917446017265, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017764132469892502, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016649428755044937, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016379499807953835, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01018458604812622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008692451752722263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008567718788981438, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008043425157666206, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007877721451222897, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005434190854430199, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005550083238631487, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005092546343803406, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0038414851296693087, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01018458604812622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01018458604812622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06312897056341171, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05923536792397499, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05644217133522034, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05114833265542984, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.028919408097863197, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.026824386790394783, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03498710319399834, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.032224446535110474, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.029352577403187752, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02620568498969078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.025191988795995712, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.017609769478440285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015340001322329044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013873211108148098, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01350838877260685, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008824003860354424, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007193774450570345, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007012585643678904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006645979359745979, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0064268349669873714, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004678150173276663, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0046268850564956665, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004231583792716265, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003067199606448412, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013873211108148098, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.013873211108148098, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21973732113838196, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20687295496463776, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20286543667316437, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18453127145767212, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10190901160240173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09735994786024094, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11404931545257568, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10479684174060822, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10331486165523529, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09238900244235992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08817555010318756, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.057734884321689606, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04988236352801323, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.048574261367321014, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04826167970895767, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028780318796634674, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02455308474600315, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024463335052132607, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02256827987730503, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.022373497486114502, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014720245264470577, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01419330295175314, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014141134917736053, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008499606512486935, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014720245264470577, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014720245264470577, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.2046695351600647, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.13538193702697754, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10703685879707336, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08976257592439651, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.08598016202449799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06083020940423012, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.11202384531497955, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.1033567488193512, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.09487811475992203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05167026445269585, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.05444391071796417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05691824480891228, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0493171289563179, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.04152557998895645, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.0394858717918396, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.028416315093636513, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.022033266723155975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.02145516872406006, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015753556042909622, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014104343019425869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.015148192644119263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015365717932581902, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.012486059218645096, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010589979588985443, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014104343019425869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014104343019425869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19402049481868744, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18375612795352936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.18060597777366638, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16514939069747925, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.090547576546669, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08694425225257874, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10008617490530014, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09263461828231812, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09163898229598999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0826239287853241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07908593118190765, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.050835300236940384, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04420991986989975, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0432746522128582, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04305555298924446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02538113109767437, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022234374657273293, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02217116765677929, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020630335435271263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020490054041147232, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01330405194312334, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013356728479266167, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012974895536899567, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008837600238621235, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01330405194312334, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01330405194312334, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23583854734897614, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2234152853488922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21965892612934113, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20094430446624756, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11002439260482788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10566266626119614, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12143625319004059, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11251197755336761, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11132698506116867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10042893141508102, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09609462320804596, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06163398176431656, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053622446954250336, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05249781161546707, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05223197117447853, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03072933293879032, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026730069890618324, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026658963412046432, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024764426052570343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024594422429800034, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015924081206321716, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015685705468058586, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015522873029112816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009885512292385101, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009885512292385101, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009885512292385101, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.55.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.204334557056427, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19117961823940277, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1849985420703888, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16889357566833496, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0926777794957161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08775071799755096, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10675542056560516, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09848891943693161, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09416690468788147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08456067740917206, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08104365319013596, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05366816744208336, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04662081599235535, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.044102687388658524, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04349224269390106, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02682306058704853, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02255571074783802, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022292697802186012, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02094615064561367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02056702971458435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014089507050812244, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013700375333428383, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01326617132872343, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008728117682039738, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014089507050812244, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014089507050812244, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06844660639762878, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0642542764544487, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06175699084997177, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05608857423067093, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.031749188899993896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.029695970937609673, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03746721148490906, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.034547142684459686, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0322050116956234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02874382585287094, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02760276570916176, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019035641103982925, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.016629500314593315, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015367483720183372, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.015058047138154507, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009669923223555088, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008218224160373211, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00807348545640707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00764627056196332, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007460205815732479, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005386128090322018, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005533669609576464, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005013857968151569, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004088378511369228, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009669923223555088, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009669923223555088, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05932348594069481, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0556158609688282, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05276481434702873, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04775160178542137, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.027116362005472183, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.025045547634363174, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03316865116357803, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.030533580109477043, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.027503445744514465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.024515297263860703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.023617466911673546, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016659483313560486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014498502016067505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01300774235278368, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012633858248591423, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008361021988093853, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006760595366358757, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006571624893695116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00624883035197854, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0060203648172318935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004436367657035589, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004383882973343134, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003979676403105259, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0029080123640596867, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014498502016067505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.014498502016067505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2148631066083908, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20214775204658508, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19810041785240173, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18004339933395386, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0996275395154953, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09501864016056061, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11153765022754669, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10264280438423157, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10098884999752045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09014900028705597, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08597048372030258, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.056421224027872086, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.048846516758203506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04748786240816116, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0471629872918129, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028120093047618866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02399074286222458, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02389177680015564, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02202567458152771, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021813655272126198, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01438097469508648, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013875219970941544, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013810121454298496, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008307388052344322, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01438097469508648, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01438097469508648, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1114964559674263, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08426515012979507, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06919509917497635, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.053238388150930405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.045380014926195145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03490673378109932, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.06790070980787277, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06204313412308693, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0516800619661808, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03605111688375473, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03521376848220825, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03486320376396179, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03159680962562561, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.024652346968650818, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.022716045379638672, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.018633943051099777, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.016834337264299393, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.016265081241726875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015659453347325325, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014739615842700005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011879503726959229, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015152430161833763, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010066702961921692, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.013603822328150272, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014739615842700005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014739615842700005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1931740939617157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18293249607086182, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17976196110248566, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1643662005662918, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09015611559152603, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08653617650270462, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09963607788085938, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09223455935716629, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09123558551073074, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08223263919353485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07862667739391327, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05055473372340202, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.044012054800987244, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04307541251182556, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04285388067364693, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025253169238567352, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022108713164925575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022044077515602112, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020502353087067604, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02036205679178238, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013208718970417976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013246028684079647, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012883193790912628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008711360394954681, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013208718970417976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013208718970417976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2357093244791031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22331221401691437, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21949587762355804, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.200786292552948, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11001013219356537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1056269109249115, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1214255839586258, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11250792443752289, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11129117757081985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10037560015916824, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09594380855560303, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06160688400268555, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05362463742494583, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05248676612973213, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.052218154072761536, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030711906030774117, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026724068447947502, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026648078113794327, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024746503680944443, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024576865136623383, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015878865495324135, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015671756118535995, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015477944165468216, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009864572435617447, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009864572435617447, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009864572435617447, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.56.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.2048707902431488, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19179211556911469, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18559354543685913, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1695357710123062, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09292121976613998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0879807248711586, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10737883299589157, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09878190606832504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09438738971948624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08482321351766586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08137071132659912, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05388236045837402, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04676692187786102, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.044225454330444336, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04361555352807045, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026998164132237434, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0226492527872324, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022382929921150208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02105106972157955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020673023536801338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014310324564576149, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013803040608763695, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01348530501127243, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008849270641803741, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014310324564576149, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014310324564576149, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06930576264858246, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06495615839958191, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06258475035429001, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05677711218595505, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03207723796367645, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.030052384361624718, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.037390921264886856, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.034641776233911514, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03256554529070854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.028959957882761955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02768196538090706, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01890602335333824, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01656428724527359, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.015414416790008545, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01513664796948433, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009477331303060055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008057800121605396, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007931702770292759, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007444879040122032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0072734118439257145, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005056761205196381, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005181263666599989, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004709081724286079, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0035950751043856144, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009477331303060055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009477331303060055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.055877264589071274, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.052309006452560425, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.0493672676384449, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.044672347605228424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.025588123127818108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.023466072976589203, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.031855057924985886, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02922418713569641, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.025972289964556694, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02314935065805912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.022392451763153076, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.016031792387366295, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013905421830713749, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012301383540034294, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.011893617920577526, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00804093200713396, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006415243726223707, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006214706227183342, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005935849156230688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005693288054317236, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004290938377380371, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004228033125400543, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0038096834905445576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0028409617953002453, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013905421830713749, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013905421830713749, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22374507784843445, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.210317462682724, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.2060985565185547, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18715229630470276, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10399407893419266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09911064803600311, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1163986474275589, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10706929117441177, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10541874170303345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0938941016793251, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08948160707950592, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0589030385017395, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05097028985619545, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.049553997814655304, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04920273274183273, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02936144918203354, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025061149150133133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02493986114859581, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022967617958784103, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.022751010954380035, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01502823643386364, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014495843090116978, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014420680701732635, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008691942319273949, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014495843090116978, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014495843090116978, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14698563516139984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1106325089931488, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09341101348400116, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07695093005895615, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06305408477783203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.048213426023721695, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08449181914329529, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07799135893583298, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0684744119644165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04437127336859703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04449189826846123, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.043022520840168, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03724414110183716, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.0305024404078722, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02871544286608696, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021521523594856262, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01625051721930504, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015590721741318703, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012793082743883133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.011485437862575054, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011541364714503288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.011577041819691658, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009347645565867424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.007956961169838905, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012793082743883133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012793082743883133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19259141385555267, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18236902356147766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1792508065700531, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1639515459537506, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0899646207690239, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08636271208524704, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09948032349348068, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09204012900590897, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09103735536336899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08205916732549667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07849623262882233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05053919181227684, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043943118304014206, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04301166161894798, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04279252514243126, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025249594822525978, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022124461829662323, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02206134982407093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02053103968501091, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020389730110764503, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013282520696520805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.0133309131488204, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012956265360116959, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008873650804162025, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013282520696520805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013282520696520805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23606319725513458, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22357125580310822, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21974575519561768, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20103181898593903, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1102009266614914, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10580019652843475, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12173102796077728, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11271025240421295, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11150394380092621, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10055379569530487, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09615878760814667, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06180519610643387, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053746819496154785, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052606914192438126, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.052339956164360046, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03084455616772175, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026818912476301193, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02674618363380432, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024841798469424248, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024672972038388252, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01604558154940605, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015789054334163666, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015645908191800117, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010022755712270737, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010022755712270737, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010022755712270737, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.57.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19981905817985535, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18706779181957245, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18078318238258362, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1651851087808609, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.090695820748806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08577098697423935, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10470747947692871, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09675078839063644, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09214961528778076, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08283045142889023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07947912812232971, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.052736956626176834, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04586190730333328, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04321735352277756, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04257611185312271, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026436233893036842, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0222163163125515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0219340231269598, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020668543875217438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020272886380553246, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01404685527086258, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013680333271622658, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013209325261414051, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008917608298361301, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01404685527086258, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01404685527086258, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06130893528461456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05734136328101158, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.054941534996032715, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.049805279821157455, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.028322065249085426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.026371270418167114, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03354591503739357, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.031068675220012665, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.028774069622159004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.025556810200214386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024478217586874962, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.016960253939032555, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014871126040816307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01364427711814642, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013346296735107899, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00852141622453928, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0071834078989923, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007042907178401947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006646464113146067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00646186014637351, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004590589087456465, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0047185723669826984, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004221807233989239, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003328918479382992, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014871126040816307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014871126040816307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.046763624995946884, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.043750520795583725, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.040911637246608734, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03703416883945465, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.021413110196590424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.01947150006890297, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.027424845844507217, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.024978388100862503, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02175532467663288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.019376259297132492, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.018823135644197464, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013767614960670471, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.011927353218197823, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01035334076732397, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009946928359568119, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006958249025046825, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005467810668051243, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005255748983472586, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005074850749224424, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004824028350412846, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0037627352867275476, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0037232027389109135, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003285312093794346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0025636665523052216, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013767614960670471, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013767614960670471, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21488730609416962, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2018936574459076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19774439930915833, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17965419590473175, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.1000577062368393, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09531117230653763, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11227086931467056, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10326740890741348, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10149472206830978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09036751091480255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08609404414892197, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05687984451651573, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04919920489192009, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04773653671145439, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.047397516667842865, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028326356783509254, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024177690967917442, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024050695821642876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022156642749905586, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021931763738393784, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014510458335280418, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014048010110855103, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013908480294048786, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008479323238134384, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014510458335280418, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014510458335280418, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.139926016330719, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10581314563751221, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08784129470586777, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07298635691404343, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05965907499194145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04424682632088661, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0840626135468483, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07691831886768341, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06603533029556274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.044989705085754395, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04348602890968323, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04290078952908516, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03754860535264015, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.029895147308707237, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.027831722050905228, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02182159386575222, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017519932240247726, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.016847768798470497, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015150806866586208, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013940754346549511, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012409172020852566, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01408515963703394, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010090372525155544, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011427217163145542, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013940754346549511, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013940754346549511, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19151657819747925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18128404021263123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17818033695220947, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16290655732154846, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08948266506195068, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08590119332075119, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09894360601902008, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09156200289726257, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09056683629751205, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08160414546728134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07806070894002914, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.050313644111156464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043727997690439224, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04279584437608719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04257708042860031, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02513921447098255, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022034166380763054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021972080692648888, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020445525646209717, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020305031910538673, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013246922753751278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013309487141668797, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012917621992528439, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008891203440725803, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013246922753751278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013246922753751278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23599840700626373, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2234628051519394, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2196575552225113, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.2008972465991974, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11019997298717499, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1058061346411705, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12169677764177322, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11273156851530075, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11154334247112274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10052317380905151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0960867628455162, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.061799269169569016, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05374462157487869, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05260308086872101, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05233258754014969, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030811838805675507, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026803791522979736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026730980724096298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02482045255601406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02464962564408779, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01599297858774662, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015759466215968132, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015589769929647446, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009969562292098999, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009969562292098999, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009969562292098999, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.58.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20192639529705048, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18912076950073242, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18279632925987244, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16711057722568512, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09163772314786911, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08669417351484299, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10599961876869202, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09780075401067734, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09307946264743805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0837554931640625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08041070401668549, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05321276932954788, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0463295616209507, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04366688430309296, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04302322119474411, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0265550147742033, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02246278151869774, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0221798587590456, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.0209145937114954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020516978576779366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01389569416642189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013853652402758598, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013024401850998402, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009065985679626465, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01389569416642189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01389569416642189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06470978260040283, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06057627871632576, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05805504694581032, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05263431742787361, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030024247244000435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.027960460633039474, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03579661250114441, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.032907284796237946, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.030488913878798485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027085818350315094, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02605612576007843, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.018217632547020912, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015808066353201866, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014524435624480247, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014208991080522537, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009234879165887833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007716511841863394, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007569774053990841, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007151967380195856, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006963132414966822, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005026712082326412, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005122387316077948, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004651538096368313, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0036886194720864296, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014524435624480247, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014524435624480247, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.048860445618629456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0456748828291893, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.042638421058654785, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.038565006107091904, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02236248552799225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0202777236700058, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.028602585196495056, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.026202186942100525, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.022721143439412117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.020243126899003983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.019633498042821884, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014370853081345558, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01248482707887888, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.010787125676870346, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.010364064015448093, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007226181216537952, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005679992958903313, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005460316315293312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0052649215795099735, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.005003268830478191, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003895157016813755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0038544272538274527, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0033873054198920727, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0026384915690869093, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014370853081345558, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.014370853081345558, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2089812308549881, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19620242714881897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19197218120098114, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17392966151237488, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.096909299492836, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09220286458730698, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.108778215944767, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.1000656858086586, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09833646565675735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0873432606458664, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08314801752567291, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.055012620985507965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04761563241481781, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0461886003613472, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04584866389632225, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027413224801421165, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023339606821537018, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023230860009789467, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02135433815419674, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021146664395928383, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01401465479284525, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013527469709515572, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013421845622360706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008072754368185997, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01401465479284525, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01401465479284525, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.14675936102867126, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1009359359741211, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07814666628837585, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06965450942516327, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.060238149017095566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04059670865535736, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08726739883422852, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07929905503988266, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06783545762300491, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04358397424221039, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04381786286830902, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.044552940875291824, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0382775217294693, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.029863497242331505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.027549827471375465, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02264457754790783, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01712794415652752, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01638723909854889, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014400242827832699, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01294944528490305, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013005169108510017, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013612370938062668, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010486855171620846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010644215159118176, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014400242827832699, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014400242827832699, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19123974442481995, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1809966266155243, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17787063121795654, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16260722279548645, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0893627256155014, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08577009290456772, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09882170706987381, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09143709391355515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09044200927019119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08145974576473236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0779266357421875, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05017484724521637, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0436583086848259, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0427275188267231, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04250151291489601, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025069495663046837, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021976618096232414, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02191474474966526, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02037936821579933, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020244477316737175, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013165837153792381, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013241520151495934, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01284191943705082, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008806059136986732, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013165837153792381, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013165837153792381, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23759466409683228, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22498536109924316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22111953794956207, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20218335092067719, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11097745597362518, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10652686655521393, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12248022109270096, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11352573335170746, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11230987310409546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10118880122900009, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09668543934822083, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06218191236257553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05410293489694595, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052960123866796494, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05268934369087219, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031006257981061935, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02697068639099598, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026895513758063316, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024961354210972786, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02478805184364319, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.016068963333964348, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015829890966415405, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015661220997571945, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009973716922104359, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009973716922104359, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009973716922104359, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.59.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19479727745056152, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18259674310684204, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17636851966381073, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16126251220703125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0885346457362175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08366785943508148, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10227564722299576, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09469139575958252, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0899304449558258, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0809800997376442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07771145552396774, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05147852003574371, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.044966764748096466, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04222240298986435, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04156585782766342, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.025775926187634468, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02178778685629368, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021484635770320892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02030627802014351, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.019893761724233627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0135871022939682, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013550066389143467, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012729162350296974, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008953460492193699, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0135871022939682, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.0135871022939682, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.045972272753715515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04283832758665085, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04025495424866676, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.03639710322022438, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02120845392346382, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.019327437505126, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.026335103437304497, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.024374105036258698, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02159290947020054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.019078882411122322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.018313227221369743, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013280084356665611, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01165258139371872, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.010249752551317215, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.009895912371575832, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.006698125042021275, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.005450091790407896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005274160765111446, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005041120108217001, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.004824153613299131, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0036727325059473515, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.003725319867953658, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003265651408582926, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0026446543633937836, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013280084356665611, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.013280084356665611, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.02719314582645893, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.025331899523735046, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.021981047466397285, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.019884740933775902, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.012309486977756023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.010312524624168873, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.018181337043642998, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.016560804098844528, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.012538105249404907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.011208285577595234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.011093487963080406, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.009093774482607841, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.007887979969382286, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.006015697494149208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.005487990099936724, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.004619555547833443, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0032974702771753073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.003028672654181719, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.003097142791375518, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.002793216146528721, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0026179132983088493, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.002530535450205207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0021132472902536392, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0018124047201126814, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.012538105249404907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.012538105249404907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2147817760705948, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2011643946170807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.196674183011055, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17822659015655518, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09995204955339432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09495582431554794, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11246463656425476, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10338610410690308, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10149052739143372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08984575420618057, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0854765847325325, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05700412020087242, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0492396242916584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.047682687640190125, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.047334689646959305, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.028412118554115295, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024117184802889824, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024005195125937462, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02201261557638645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02177484892308712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014528760686516762, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013998701237142086, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013861851766705513, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008382152765989304, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014528760686516762, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014528760686516762, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.12058781087398529, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0925537496805191, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06859162449836731, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06266531348228455, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.04944508150219917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.032267823815345764, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08371381461620331, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07594674825668335, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.056602802127599716, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04215199872851372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04282817617058754, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.042952921241521835, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03654715046286583, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.024901820346713066, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.021324915811419487, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021922986954450607, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014591768383979797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.013226435519754887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013386615552008152, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.011393137276172638, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01268811896443367, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012513929978013039, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009372899308800697, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009506654925644398, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014591768383979797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014591768383979797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.191354438662529, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1811039298772812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17795555293560028, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1627369523048401, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08944137394428253, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08584648370742798, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09897015243768692, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09152600914239883, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09053212404251099, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08152306824922562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07803507149219513, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05025105178356171, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043715961277484894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.042781755328178406, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0425633005797863, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025127941742539406, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022040504962205887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02197575382888317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02044127881526947, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020301103591918945, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013239213265478611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013325527310371399, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012916138395667076, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008926449343562126, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013239213265478611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013239213265478611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23896993696689606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22620584070682526, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22236911952495575, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20328402519226074, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11164819449186325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10715320706367493, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1232355535030365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11419383436441422, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11297927796840668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10176938027143478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0972888320684433, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0625591054558754, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05444994941353798, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05329502373933792, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05301797389984131, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031199095770716667, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.027156908065080643, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.027080832049250603, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.025132054463028908, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024957070127129555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.016173560172319412, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015969526022672653, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015761863440275192, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010106002911925316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010106002911925316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010106002911925316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.60.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20362702012062073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1910848468542099, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18468569219112396, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16898788511753082, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09248916804790497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08758699148893356, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1073891669511795, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09878088533878326, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09388834238052368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08470160514116287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08143573999404907, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05397708714008331, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04680679738521576, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.044075608253479004, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04342036694288254, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.027102919295430183, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022675830870866776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022378623485565186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.021144984290003777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020741241052746773, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014463466592133045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013992615975439548, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013577987439930439, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009149983525276184, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014463466592133045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014463466592133045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06035967171192169, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.056447505950927734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05415008217096329, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.049091070890426636, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.027970757335424423, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.026079092174768448, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03296744078397751, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03052605874836445, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02842867374420166, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.025196615606546402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024091556668281555, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.016685396432876587, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01461009867489338, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013484658673405647, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013207043521106243, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00839295145124197, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0071110911667346954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006983438041061163, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006571514997631311, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006403226871043444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004536858294159174, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004671905189752579, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004192472435534, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003323239041492343, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01461009867489338, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01461009867489338, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.041934579610824585, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.03914543241262436, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03639109805226326, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03290858119726181, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.019175047054886818, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.01731867901980877, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.02490735799074173, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.022630054503679276, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.019482530653476715, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.017337363213300705, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.016853027045726776, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012509286403656006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010765988379716873, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009266342967748642, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.008871282450854778, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006306212395429611, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00487913703545928, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0046779196709394455, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004525431897491217, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004283966030925512, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003408485557883978, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003331343410536647, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.00294028059579432, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002277612453326583, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012509286403656006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012509286403656006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22617629170417786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2122420072555542, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.2078608125448227, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18867361545562744, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10526125133037567, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.10021903365850449, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1178150624036789, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10846703499555588, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10679526627063751, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09492220729589462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.09033864736557007, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05962367355823517, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.051645707339048386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.05018595978617668, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.049833036959171295, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02971906028687954, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02537389285862446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02526228502392769, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.023205287754535675, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.022984405979514122, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.015188333578407764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01467764563858509, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01458846963942051, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00878719799220562, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01467764563858509, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01467764563858509, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13662858307361603, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10263777524232864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08176388591527939, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.073984295129776, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.058008600026369095, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0392816960811615, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08543962985277176, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07840661704540253, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06453645974397659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.045608412474393845, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04488406330347061, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04356768727302551, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.037619106471538544, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.028523115441203117, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.025951633229851723, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02200002409517765, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01576363854110241, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014797654002904892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013548809103667736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.011909282766282558, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01218986976891756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012228674255311489, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009393622167408466, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008847602643072605, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014797654002904892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014797654002904892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19111143052577972, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1808510720729828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17770814895629883, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16237612068653107, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08936531841754913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08575531095266342, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09881795942783356, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09145550429821014, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09045010805130005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08141034841537476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07784739881753922, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0501832515001297, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043683867901563644, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04274485632777214, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04252780228853226, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025068391114473343, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.0220062006264925, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02194387838244438, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020406214520335197, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020264962688088417, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013164079748094082, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.0132888313382864, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012837521731853485, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00887801218777895, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013164079748094082, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013164079748094082, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23989492654800415, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22705954313278198, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22317440807819366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20397521555423737, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11208187788724899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10757119208574295, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1237800195813179, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11468324065208435, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11343751847743988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10216651856899261, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09760411828756332, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06285390257835388, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.054675161838531494, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05352054536342621, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05324047803878784, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03134132921695709, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.027256647124886513, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.027179798111319542, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.025218607857823372, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02504136599600315, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.016241300851106644, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.016009874641895294, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015826188027858734, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01010279729962349, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01010279729962349, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01010279729962349, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.61.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20507946610450745, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19264736771583557, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18615111708641052, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.17045968770980835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0931507870554924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0882052630186081, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10784736275672913, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09962722659111023, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09452477097511292, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0854000672698021, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0820903405547142, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05422796308994293, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04711762070655823, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.044302813708782196, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.043618399649858475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.027062436565756798, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022639289498329163, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022328758612275124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.021106181666254997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02068302035331726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014153704047203064, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013769609853625298, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013233460485935211, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008691051974892616, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014153704047203064, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014153704047203064, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06450414657592773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06031028553843498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05790729820728302, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05244243144989014, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02990206703543663, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.027884937822818756, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03519933670759201, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.032591111958026886, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0303739495575428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.026924585923552513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.025716304779052734, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.017804116010665894, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015618161298334599, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014432178810238838, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014139994978904724, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008960115723311901, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007631431333720684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007495943922549486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007051644381135702, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006876458413898945, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004843129310756922, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005035312846302986, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004481880459934473, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003616427071392536, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014432178810238838, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.014432178810238838, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.04578595608472824, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.04275207966566086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04001779854297638, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.036215443164110184, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.021038005128502846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.019114559516310692, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.02695983089506626, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.024482114240527153, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.021410977467894554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.01901068724691868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.018493689596652985, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013606514781713486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.011689245700836182, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.010178585536777973, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.009793464094400406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00683625927194953, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005371287930756807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005172626581043005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0049769142642617226, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004740758799016476, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.00368725648149848, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0036449588369578123, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0032229023054242134, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0025216087233275175, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013606514781713486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.013606514781713486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22418570518493652, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2106260359287262, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.2061721533536911, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.187249556183815, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.1045890524983406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09963645040988922, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11707735061645508, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10782252997159958, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10607996582984924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.0942949429154396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0897773951292038, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05923933535814285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05135741084814072, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04986447095870972, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.049516092985868454, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.029539380222558975, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025190245360136032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.025085965171456337, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.023030171170830727, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02279730513691902, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.015095816925168037, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014539133757352829, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014488456770777702, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008627722039818764, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014539133757352829, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014539133757352829, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.09185118228197098, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07193870842456818, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06461485475301743, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.055282775312662125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.04052802920341492, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03234773874282837, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.05019788071513176, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04604274779558182, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.042878154665231705, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.030243827030062675, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02831287682056427, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.025568220764398575, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.022070739418268204, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.019597452133893967, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.018977230414748192, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01285670604556799, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.010463978163897991, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.010262412950396538, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008547387085855007, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008117757737636566, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006987381726503372, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.007152913138270378, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.006140911020338535, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.005152794532477856, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01285670604556799, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01285670604556799, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19124503433704376, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18089450895786285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17774973809719086, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16244299709796906, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08944229036569595, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08580461144447327, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0989186242222786, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09154044091701508, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09054352343082428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08146107941865921, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07785528898239136, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05022694915533066, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043725863099098206, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.042785294353961945, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042561277747154236, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025088347494602203, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022030137479305267, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021968334913253784, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020421432331204414, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02028016559779644, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013168469071388245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013311717659235, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012838095426559448, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008896269835531712, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013168469071388245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013168469071388245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23992136120796204, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2270609736442566, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22316336631774902, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.2038910984992981, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1121375635266304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1075693815946579, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1238052099943161, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11471185833215714, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11347641050815582, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.1021481603384018, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0975278913974762, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06282256543636322, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05468021705746651, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05350892245769501, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05323430895805359, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031325504183769226, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.027237512171268463, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02715790644288063, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.025184325873851776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.025009842589497566, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01619749702513218, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015966292470693588, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.01578090712428093, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01003072690218687, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01003072690218687, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.01003072690218687, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.62.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20231223106384277, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.189964160323143, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18344159424304962, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16797661781311035, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09197460114955902, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08704455196857452, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10691564530134201, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09855309873819351, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09336885809898376, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08431563526391983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08100912719964981, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05353476107120514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04677586629986763, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04394083470106125, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04325046390295029, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026689667254686356, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0227925144135952, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02248210459947586, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02130601927638054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02088763751089573, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013895395211875439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014338571578264236, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012959831394255161, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009703237563371658, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013895395211875439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013895395211875439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.060833416879177094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05687595531344414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05428456515073776, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.049235787242650986, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.028188258409500122, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.026122238487005234, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.034059032797813416, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03120492957532406, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02863866090774536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.025420362129807472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024438846856355667, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.017258048057556152, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01499300729483366, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013641357421875, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013309026136994362, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008750560693442822, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007259170524775982, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007098651956766844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006718938238918781, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006517174653708935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004799171816557646, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004861717112362385, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0043988157995045185, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003497266210615635, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013641357421875, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013641357421875, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.042349960654973984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.03947436809539795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03673136606812477, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03327234834432602, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.019457504153251648, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.017513087019324303, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.025163263082504272, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.022922370582818985, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.01978941075503826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.017548654228448868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.017017943784594536, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012689951807260513, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010959291830658913, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009403168223798275, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.008996854536235332, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006395585369318724, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.004958672448992729, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004755532834678888, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004587753210216761, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004345479886978865, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0034625353291630745, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003390488913282752, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.003000079421326518, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0023258309811353683, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012689951807260513, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.012689951807260513, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2118827849626541, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19894035160541534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1945842206478119, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17642958462238312, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09869204461574554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09395936876535416, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11064394563436508, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10198534280061722, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10016661882400513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08889797329902649, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08456861227750778, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05602450668811798, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04860702157020569, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.047132451087236404, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04678189754486084, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027936922386288643, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02389143407344818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023778241127729416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021867934614419937, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02164650522172451, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01433971431106329, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013964870944619179, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013754438608884811, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008533536456525326, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01433971431106329, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01433971431106329, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.134263277053833, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10800783336162567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08518176525831223, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0727178156375885, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05669669434428215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04106377810239792, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0927879810333252, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0832582488656044, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06359437853097916, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04760418459773064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04855902120471001, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04731367900967598, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04051820933818817, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.02890971302986145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02547040581703186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02447950281202793, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01751708984375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01623961515724659, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.016090113669633865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014263851568102837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014389028772711754, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015070727095007896, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011041099205613136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.012156587094068527, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014263851568102837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014263851568102837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.19023235142230988, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17992162704467773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1767738312482834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1615159511566162, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08902773261070251, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0853976160287857, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0985834077000618, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09110967814922333, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0901128500699997, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08108387142419815, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07749368995428085, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05013449490070343, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04355667531490326, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04261578619480133, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042390987277030945, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025100907310843468, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021995920687913895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021936306729912758, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020403672009706497, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020266130566596985, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01333138532936573, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013372436165809631, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013008186593651772, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009035324677824974, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01333138532936573, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01333138532936573, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23884539306163788, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22597724199295044, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22210228443145752, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.2029019147157669, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11164882034063339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10711933672428131, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12332481890916824, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11421919614076614, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11298903077840805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10170084983110428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0971112996339798, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06262405216693878, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05446331202983856, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05330108478665352, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.053025826811790466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031229032203555107, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.027138596400618553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.027063090354204178, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.025094766169786453, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024918852373957634, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.016196973621845245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015923017635941505, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015784407034516335, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010027197189629078, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010027197189629078, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010027197189629078, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.63.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20400889217853546, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19158624112606049, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18527007102966309, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1697143018245697, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09276697784662247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08788963407278061, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10726593434810638, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09904522448778152, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09418872743844986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08510322868824005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08174880594015121, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.053884513676166534, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.047042470425367355, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04428704082965851, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04361691325902939, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02701311931014061, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022905660793185234, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022609956562519073, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.021416062489151955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.021010229364037514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014288799837231636, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014310322701931, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013407083228230476, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009594514966011047, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014288799837231636, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014288799837231636, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0655965581536293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.061423737555742264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.05901732295751572, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05353900045156479, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030420226976275444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.02843853086233139, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03571956977248192, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03308745473623276, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.030897092074155807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027458731085062027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02624320052564144, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01808391511440277, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.0158358383923769, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01465686596930027, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014369271695613861, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.009089280851185322, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007722421083599329, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007591168861836195, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007145279087126255, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006971246097236872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004902746062725782, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0050561269745230675, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004541839938610792, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003587801242247224, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01465686596930027, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01465686596930027, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.05107901990413666, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.047732993960380554, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.04494131729006767, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04063447564840317, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.023344432935118675, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.021322503685951233, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.029300976544618607, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02684761770069599, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.023708991706371307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.021078437566757202, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02038457617163658, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01472669281065464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.012769797816872597, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.011228015646338463, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.010832239873707294, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007394479587674141, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.005861423444002867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.005662556737661362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005420273169875145, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0051802354864776134, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.003954604733735323, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0038893744349479675, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0034657709766179323, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0026110480539500713, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01472669281065464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01472669281065464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22314952313899994, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20975051820278168, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20551447570323944, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1865340769290924, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10388804972171783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09903250634670258, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11623011529445648, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10704779624938965, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10532989352941513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09379079937934875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08923941105604172, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.058803122490644455, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05096428841352463, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04953779652714729, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04917624220252037, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02931230142712593, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025010718032717705, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.024902626872062683, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022914282977581024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.022684015333652496, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014974330551922321, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014422469772398472, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014385507442057133, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008569629862904549, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014422469772398472, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014422469772398472, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17777234315872192, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11194810271263123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07641009241342545, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07824230194091797, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07094752788543701, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.038149867206811905, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10631408542394638, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09711474180221558, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08260586112737656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05051685497164726, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.0519966185092926, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0542430579662323, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04648994281888008, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03488269820809364, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03157472237944603, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.027225399389863014, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019391199573874474, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01836099661886692, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015859713777899742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013625315390527248, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.014903999865055084, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.015218760818243027, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01106783002614975, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.011069688946008682, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013625315390527248, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013625315390527248, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18798884749412537, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1777612715959549, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17462760210037231, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15950298309326172, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08795369416475296, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08435647189617157, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09725432842969894, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09002922475337982, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08904203027486801, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08008243888616562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0765310600399971, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.049453046172857285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0430200956761837, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0420883372426033, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04186955839395523, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02471027337014675, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02170245535671711, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021639622747898102, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02011607214808464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01997843012213707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013023978099226952, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013155676424503326, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012699386104941368, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008845650590956211, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013023978099226952, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013023978099226952, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23726633191108704, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.22447305917739868, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22059209644794464, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20156534016132355, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11097733676433563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.1064547747373581, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12258591502904892, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11353649944067001, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.1123187392950058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10107938945293427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09653280675411224, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0622587576508522, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05417292192578316, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05301447957754135, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05273463949561119, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031093358993530273, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.027052776888012886, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026978911831974983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.025027921423316002, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024854423478245735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01622365415096283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01596907526254654, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015813831239938736, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010192152112722397, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010192152112722397, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010192152112722397, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.64.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20242708921432495, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18989306688308716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18342946469783783, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16794218122959137, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09196261316537857, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08699310570955276, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10649818181991577, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0984451174736023, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.0934491828083992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08429737389087677, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08104028552770615, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05356460437178612, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04665990546345711, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04380011931061745, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04310884326696396, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026784196496009827, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02246657758951187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.022159075364470482, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020958462730050087, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020533472299575806, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014074992388486862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013799656182527542, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013170461170375347, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008888551965355873, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014074992388486862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014074992388486862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0536394827067852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05004554241895676, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04773201420903206, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04317503422498703, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.024835824966430664, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.022985782474279404, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.02975539118051529, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.02753482758998871, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.025261646136641502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02233370766043663, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.021345267072319984, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.015043078921735287, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013167372904717922, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.011966045014560223, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.011669664643704891, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.007564924191683531, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006297037936747074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006154115777462721, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.0058068991638720036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005624586250633001, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0040784054435789585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0041508786380290985, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003713962621986866, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0029058794025331736, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013167372904717922, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013167372904717922, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.03356669470667839, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.03118089586496353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.028491690754890442, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.025706026703119278, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.015280885621905327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.013542607426643372, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.020533554255962372, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.018714463338255882, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.01556408405303955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013772170059382915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.013424243777990341, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.010296844877302647, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.008891318924725056, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.007412979379296303, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0070037092082202435, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.00521080894395709, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.003937480039894581, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0037280044052749872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0036508790217339993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0034048212692141533, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0028422523755580187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.002778070978820324, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0023854458704590797, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0019109027925878763, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013772170059382915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013772170059382915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.21872679889202118, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.20509879291057587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20062778890132904, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18170730769634247, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10192959755659103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09693223237991333, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11409806460142136, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10512497276067734, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10343712568283081, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09164818376302719, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08698992431163788, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05772412568330765, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05009548366069794, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04865168035030365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04827762395143509, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02879180759191513, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024620626121759415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0245185699313879, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022495372220873833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.022279977798461914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01474485918879509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014325087890028954, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01415437925606966, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008650624193251133, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01474485918879509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01474485918879509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1375434696674347, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10847056657075882, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09843247383832932, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07817117869853973, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06184270605444908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05072138085961342, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07446181774139404, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06811672449111938, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0648086741566658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.042879361659288406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.040687061846256256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0379861555993557, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.032626308500766754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.029775023460388184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.029076213017106056, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.019020259380340576, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015644419938325882, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.0154283307492733, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012021537870168686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.011497100815176964, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.010125318542122841, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.010239644907414913, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009126240387558937, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.007118185516446829, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012021537870168686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012021537870168686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18981750309467316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1794993281364441, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17634785175323486, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16106027364730835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08884670585393906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08519567549228668, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09818405658006668, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09094174206256866, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08993276208639145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08085817843675613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07722613215446472, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04989344999194145, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04344017058610916, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04249521717429161, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04227566346526146, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024924583733081818, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02186940796673298, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021807575598359108, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020262030884623528, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020120935514569283, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013075251132249832, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01319551095366478, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012744307518005371, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008801206015050411, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013075251132249832, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013075251132249832, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2370220571756363, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2242433726787567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.22034288942813873, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.20128051936626434, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.11084651201963425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10633940249681473, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12252578884363174, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11342538893222809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11219180375337601, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.10092591494321823, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09637279063463211, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0622534342110157, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.054111357778310776, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052946388721466064, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.052670639008283615, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.031073391437530518, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02700962871313095, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026931699365377426, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024977318942546844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024804409593343735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01619785465300083, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015927189961075783, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015782644972205162, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010136831551790237, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010136831551790237, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010136831551790237, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.65.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.20358313620090485, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.19099244475364685, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18449877202510834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1689528077840805, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0924593061208725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08750256150960922, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10712842643260956, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09898144006729126, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09395256638526917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08475218713283539, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.0814630389213562, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05388716235756874, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04684527963399887, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.044007983058691025, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0433206669986248, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02686193771660328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022514333948493004, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02220837026834488, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.02099027670919895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020565498620271683, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014004860073328018, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013739344663918018, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013076535426080227, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008726890198886395, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014004860073328018, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014004860073328018, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.05958864092826843, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05577067658305168, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.053279124200344086, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.04824663698673248, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02761099673807621, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.025646546855568886, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03297213092446327, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03052840754389763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.02805911935865879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.024896763265132904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.023810693994164467, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.016668902710080147, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014607613906264305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013322549872100353, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.013006545603275299, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.008385119028389454, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007026112638413906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.006874732673168182, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006497634574770927, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.0063059390522539616, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004516832530498505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004646932706236839, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004123780410736799, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003285713028162718, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014607613906264305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.014607613906264305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.041513826698064804, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.03886706382036209, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.03585860878229141, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.03245324268937111, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.01903381384909153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.017048204317688942, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.025050295516848564, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02288089133799076, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.019321730360388756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.017227569594979286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.016751879826188087, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01260251086205244, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.010884344577789307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.009195039048790932, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.00874585472047329, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.006329940166324377, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0048452261835336685, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.004611602518707514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.004495895467698574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.004221535753458738, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0033925375901162624, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.003340347670018673, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0028559945058077574, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0022605974227190018, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01260251086205244, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01260251086205244, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2257847785949707, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.21201293170452118, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20761460065841675, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1883108913898468, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10521811991930008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.10019221901893616, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11779844760894775, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10851749777793884, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10673152655363083, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09480930119752884, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.09010021388530731, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05955677106976509, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05167727544903755, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0501735620200634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04981115460395813, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02969905361533165, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025341682136058807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.025223098695278168, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.023182595148682594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02295025624334812, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01518345344811678, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014644415117800236, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014571353793144226, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008710311725735664, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014644415117800236, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014644415117800236, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.155923530459404, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0958978533744812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06711553782224655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0710829570889473, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06381677836179733, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03219233453273773, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0892249271273613, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.08185579627752304, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07272905856370926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.042940884828567505, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04466342553496361, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04545076563954353, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.039199262857437134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03108363039791584, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.028878813609480858, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.022834178060293198, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.016894645988941193, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01624770648777485, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013181670568883419, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.011577180586755276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012292223051190376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012569773010909557, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009489718824625015, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008952958509325981, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013181670568883419, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013181670568883419, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18842163681983948, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17817504703998566, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17504163086414337, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1598864644765854, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08821055293083191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0845896527171135, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09752927720546722, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09030058234930038, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08929953724145889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08030450344085693, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07666467875242233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04956322908401489, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04315223917365074, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04220819100737572, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04199118912220001, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02476496249437332, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021751489490270615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02168888784945011, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02015536278486252, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020015621557831764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013014893047511578, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013169585727155209, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012685933150351048, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00883476436138153, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013014893047511578, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013014893047511578, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2347741276025772, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2220669984817505, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21820655465126038, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1992911547422409, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1097857803106308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10530272126197815, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12126697599887848, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11233795434236526, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11112949997186661, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09992685168981552, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09538143128156662, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.061567794531583786, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05357500910758972, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.052420586347579956, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05214298516511917, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030706657096743584, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026700934395194054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02662620134651661, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02467861771583557, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02450711280107498, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015905113890767097, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015687616541981697, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015495969913899899, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009902508929371834, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009902508929371834, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009902508929371834, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.66.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.18441371619701385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1727568358182907, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.16671381890773773, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.15266314148902893, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0837889090180397, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0791548490524292, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09713976085186005, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08991744369268417, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08520976454019547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07678766548633575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07373635470867157, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04886447265744209, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04270104691386223, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03999986872076988, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0393424928188324, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024418139830231667, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02066647820174694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020374808460474014, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.019299082458019257, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01889372430741787, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012792086228728294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012917561456561089, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01192468125373125, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00858699344098568, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012792086228728294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012792086228728294, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.049336910247802734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.04629046097397804, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04293129965662956, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.038922086358070374, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02294880338013172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.020716436207294464, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.029341094195842743, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.027219004929065704, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.023332707583904266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02076885849237442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.020046113058924675, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.014922103844583035, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.013090578839182854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.011211356148123741, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01071909163147211, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0076211742125451565, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.006110811606049538, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.005863269325345755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.005703888833522797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.005414480809122324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0043159290216863155, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004399850033223629, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.003804057603701949, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0032768144737929106, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.014922103844583035, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.014922103844583035, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.032342612743377686, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.030194025486707687, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.027115458622574806, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.02442937344312668, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.014781702309846878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.012903130613267422, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0202510766685009, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.018707403913140297, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.01503917295485735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.013334114104509354, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.012946032918989658, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01019278634339571, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.008894720114767551, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.007181180641055107, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.006705909967422485, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.005173513665795326, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0038331609684973955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.003592290449887514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0035570738837122917, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.003276319010183215, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0028703222051262856, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0027683675289154053, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0023825836833566427, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.001910019083879888, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.014781702309846878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.014781702309846878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2183791846036911, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2041759341955185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19941847026348114, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17965441942214966, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10162407159805298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09634442627429962, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11454857140779495, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10525244474411011, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10323985666036606, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09094610810279846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08629160374403, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.058086708188056946, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.05020048841834068, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.048560064285993576, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04819655418395996, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0289900042116642, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024636201560497284, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02450217492878437, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02242480218410492, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02217179164290428, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01486919540911913, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014451747760176659, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014183448627591133, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008847491815686226, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01486919540911913, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.01486919540911913, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.057586945593357086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0442192517220974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.03595907241106033, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.03145310655236244, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.02524028718471527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0183376707136631, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03704816475510597, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.032970625907182693, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.027015909552574158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.019571013748645782, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.01919325813651085, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.019187863916158676, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01652640290558338, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013185296207666397, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.012295633554458618, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010246175341308117, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008468909189105034, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008129622787237167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007677488960325718, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007207861635833979, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006629466079175472, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.007303810678422451, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005796852987259626, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.006387459114193916, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013185296207666397, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.013185296207666397, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1884709596633911, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17820462584495544, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17505300045013428, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1598302721977234, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08827419579029083, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08463837951421738, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.0975395143032074, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09036387503147125, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08935617655515671, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08032107353210449, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07670460641384125, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.049579035490751266, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04318670928478241, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04224732518196106, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.042024821043014526, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024769440293312073, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021786989644169807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021726202219724655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02018655091524124, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020049646496772766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013015475124120712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013212834484875202, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012687566690146923, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008892460726201534, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013015475124120712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013015475124120712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23402608931064606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2213335931301117, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21748663485050201, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19859780371189117, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10949856787919998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10501538217067719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.12098448723554611, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11205320805311203, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.11084427684545517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09965809434652328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09516273438930511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06147008016705513, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05346325784921646, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05230952054262161, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05203312262892723, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030664462596178055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02670050598680973, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026627542451024055, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024688001722097397, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024515166878700256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015955854207277298, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015772033482789993, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015549249947071075, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010079342871904373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010079342871904373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010079342871904373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.67.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1994304060935974, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18667179346084595, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18010519444942474, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16492389142513275, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09045963734388351, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08548467606306076, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10537326335906982, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09716051071882248, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09205137938261032, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08287573605775833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07967925816774368, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.052919480949640274, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0460074245929718, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04311540350317955, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.042413175106048584, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026502978056669235, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02214675396680832, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021836232393980026, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020652659237384796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020219139754772186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014033064246177673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01365907583385706, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013116414658725262, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00883882399648428, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014033064246177673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014033064246177673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07553796470165253, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07080371677875519, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06849509477615356, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.062063850462436676, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.035084642469882965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03302830457687378, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.040450822561979294, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0374261848628521, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03560858592391014, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03162546455860138, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030158260837197304, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02047698199748993, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01788199506700039, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016846852377057076, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01659521460533142, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010260102339088917, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008778071962296963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008668534457683563, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00809384509921074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007937692105770111, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005456155631691217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005568158347159624, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005122687201946974, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003837897442281246, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010260102339088917, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010260102339088917, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06393112242221832, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.059836987406015396, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05725230649113655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05173873528838158, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02937505394220352, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.027325471863150597, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03525207191705704, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.032220762223005295, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.029792601242661476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026434533298015594, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.025405000895261765, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01775379292666912, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015340005047619343, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014076558873057365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013760725036263466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008880727924406528, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0072726840153336525, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0071219392120838165, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006692252121865749, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0065005021169781685, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004685276187956333, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004609323106706142, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004265953786671162, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003037482500076294, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014076558873057365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014076558873057365, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22776556015014648, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.21383361518383026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20947293937206268, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18968673050403595, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10586052387952805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.1007734015583992, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.1178910955786705, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10885050147771835, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.107240229845047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09530627727508545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.09051614999771118, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.059676643460989, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.051865316927433014, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.050465989857912064, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.050147853791713715, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.029762599617242813, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025594977661967278, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02549467235803604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02343715913593769, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02323347143828869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.015285089612007141, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014952895231544971, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014737899415194988, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.009187725372612476, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014952895231544971, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014737899415194988, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1809881627559662, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10987652838230133, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07940560579299927, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07916352897882462, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07374598830938339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.039062533527612686, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.09977875649929047, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09132752567529678, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08377199620008469, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04655608907341957, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.048808541148900986, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.050496358424425125, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.043504029512405396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.035668402910232544, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03359156474471092, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.025367092341184616, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019056709483265877, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01853121817111969, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014120123349130154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.01250070333480835, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01372755877673626, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.01361637283116579, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0110579002648592, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00943770818412304, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014120123349130154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014120123349130154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18544843792915344, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1752963811159134, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17218290269374847, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15721064805984497, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08684554696083069, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08326709270477295, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09607643634080887, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08890621364116669, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08792608976364136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07902148365974426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07549986243247986, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04886684566736221, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04250649735331535, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04157894104719162, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.041361309587955475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024414947256445885, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021459922194480896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021401003003120422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01988832652568817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.0197499580681324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012865113094449043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013042441569268703, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012541752308607101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008809716440737247, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012865113094449043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012865113094449043, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23087577521800995, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.2183028906583786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21451249718666077, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19588497281074524, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10802072286605835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10361703485250473, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1194581612944603, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11056559532880783, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10936164855957031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09832409769296646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09387265145778656, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06073292717337608, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05277498438954353, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05163288116455078, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.05136249214410782, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.030367013067007065, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026381047442555428, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026308322325348854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024397866800427437, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02422953024506569, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015941258519887924, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015623302198946476, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015548747032880783, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010036191903054714, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010036191903054714, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010036191903054714, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.68.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.2019333690404892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18856744468212128, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.18141837418079376, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1660727560520172, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.09158984571695328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08622873574495316, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10766004025936127, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09906895458698273, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09333101660013199, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08377272635698318, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.08060117065906525, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05413752421736717, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.046948421746492386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.043710097670555115, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04292198270559311, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02708391658961773, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02255062758922577, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02220185473561287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.021020540967583656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.020534364506602287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014329551719129086, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014096537604928017, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.013276729732751846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009272824972867966, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014329551719129086, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.014329551719129086, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08549109846353531, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.08027333766222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07791454344987869, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07078336179256439, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03979715704917908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03759989142417908, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04587288200855255, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.042145125567913055, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.04035983234643936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03597673773765564, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03443247079849243, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.023403078317642212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.020255086943507195, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.019219210371375084, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01897422969341278, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011847158893942833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.010060720145702362, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009953535161912441, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009313725866377354, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.009162478148937225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.00631196703761816, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006348242051899433, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005987237673252821, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004370520822703838, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011847158893942833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011847158893942833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07275819778442383, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06816871464252472, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06564792990684509, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.0594116672873497, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03351542726159096, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03139171749353409, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03948533162474632, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03629310056567192, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.034019868820905685, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.030248135328292847, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.028923621401190758, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019902659580111504, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017278680577874184, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01605123095214367, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015751434490084648, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009963980875909328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00825829990208149, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008117140270769596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007594874128699303, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007409708108752966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005219101905822754, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005139896180480719, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004812633153051138, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003333830274641514, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009963980875909328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009963980875909328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2148887664079666, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.2019921839237213, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19792962074279785, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1794493943452835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09986089915037155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0952148362994194, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11127560585737228, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10283299535512924, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10129665583372116, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09011215716600418, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08561018109321594, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05629558861255646, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04893367737531662, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04762504994869232, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04731123894453049, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02808297798037529, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.024086741730570793, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023971030488610268, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.022043559700250626, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021853793412446976, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014366245828568935, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01392702478915453, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013850521296262741, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008351016789674759, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014366245828568935, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014366245828568935, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.1774759292602539, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11213383078575134, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08254425972700119, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08123329281806946, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07537239789962769, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04535367339849472, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.10004641860723495, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.09187120944261551, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.08232954889535904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04741331934928894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.049100324511528015, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.05078188329935074, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.043761685490608215, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.036351729184389114, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.034392379224300385, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02536926046013832, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.0192110538482666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.018554288893938065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014028655365109444, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012471850961446762, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01337654609233141, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013413920998573303, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010773140005767345, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009057668969035149, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014028655365109444, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014028655365109444, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1884380578994751, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17813853919506073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17497064173221588, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1597234308719635, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0882396474480629, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08460090309381485, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09755269438028336, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0903487503528595, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08932454138994217, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08028135448694229, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0766478031873703, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04960663616657257, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.043172601610422134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0422331877052784, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04201115667819977, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02479395642876625, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021767469123005867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02170729450881481, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020167572423815727, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020028501749038696, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013059988617897034, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013188283890485764, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012735018506646156, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008854069747030735, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013059988617897034, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013059988617897034, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.23199495673179626, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21936872601509094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.21555840969085693, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.196788489818573, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10858799517154694, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10412077605724335, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11998994648456573, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.11112063378095627, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10991345345973969, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09881066530942917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09428782761096954, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.06099279597401619, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.053008951246738434, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.051867853850126266, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.051596030592918396, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03041670098900795, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.026472680270671844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026397772133350372, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024474376812577248, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.024303220212459564, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015829550102353096, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015635913237929344, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015424343757331371, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009984324686229229, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009984324686229229, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009984324686229229, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.69.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19608741998672485, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18287114799022675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1757945567369461, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1607927829027176, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08897499740123749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08363187313079834, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1040746197104454, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09633896499872208, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09073541313409805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08129440248012543, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07808457314968109, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05247798562049866, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04584742709994316, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.042602889239788055, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04181841388344765, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02622034400701523, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022228743880987167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.02188151702284813, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020745491608977318, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0202600359916687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013796279206871986, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.014234419912099838, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.01273266039788723, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009781102649867535, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013796279206871986, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013796279206871986, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07337788492441177, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.06866780668497086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06627894937992096, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.060025133192539215, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03402366116642952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03193984925746918, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03984125703573227, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.036578428000211716, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03460139036178589, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.030684616416692734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02936999499797821, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02020397037267685, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017491206526756287, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016348931938409805, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01606823317706585, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010130764916539192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00851782038807869, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008395298384130001, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007857088930904865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007683134637773037, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005389819387346506, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005425741430372, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.0050100237131118774, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003721562447026372, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010130764916539192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010130764916539192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06356240063905716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.05942544713616371, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.056936852633953094, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05149944871664047, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.029255805537104607, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.027253447100520134, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03475847467780113, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03210160881280899, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.029735533520579338, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.026343677192926407, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02516879327595234, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.01752244122326374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015273760072886944, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014039263129234314, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.013728514313697815, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.0087796151638031, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007245765533298254, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007098413072526455, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006658914498984814, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006468833424150944, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004631211049854755, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004574976861476898, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004232619423419237, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002995838178321719, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014039263129234314, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014039263129234314, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2082691639661789, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19574680924415588, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19148382544517517, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17365762591362, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0967242643237114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09220997989177704, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10800693184137344, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09974443912506104, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09805572777986526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08733435720205307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0830153226852417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05464312806725502, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0475078821182251, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04612429067492485, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04580371826887131, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027237867936491966, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.023338768631219864, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023228662088513374, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021383419632911682, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021179908886551857, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013965035788714886, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01355529110878706, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013439320959150791, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008174151182174683, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013965035788714886, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013965035788714886, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.12735401093959808, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09519794583320618, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08349769562482834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0652150958776474, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05465128272771835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0417574904859066, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.06859087198972702, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06249966099858284, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.05861295014619827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03695782274007797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03515705093741417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03489609807729721, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.029899172484874725, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.026443500071763992, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02557859756052494, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.017567511647939682, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014278178103268147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.014032351784408092, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.011113305576145649, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.010484200902283192, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.00968102551996708, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.009982699528336525, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.008512127213180065, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.007406368851661682, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014278178103268147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.014278178103268147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18895241618156433, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1786099225282669, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17544996738433838, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16014665365219116, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08849732577800751, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0848485678434372, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09786614775657654, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09059727191925049, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08959774672985077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0805152878165245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07686521112918854, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.049735330045223236, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04329412430524826, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04234541207551956, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04212299361824989, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02484961971640587, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021806253120303154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021745482459664345, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020194469019770622, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02005525305867195, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013042779639363289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013181185349822044, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012710889801383018, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008816981688141823, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013042779639363289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013042779639363289, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22977741062641144, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21722593903541565, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.2134346067905426, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.19479112327098846, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10752826184034348, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10310709476470947, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11881875991821289, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.1100434884428978, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10884829610586166, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0978391095995903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0933765098452568, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.060390714555978775, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05250689014792442, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.05137123912572861, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.051099687814712524, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.03013901226222515, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02623913437128067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.026168230921030045, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.024258242920041084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.02408638224005699, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015710826963186264, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.015526174567639828, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.015312207862734795, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009952882304787636, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009952882304787636, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009952882304787636, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.70.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1973261535167694, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.18313509225845337, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17555111646652222, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.16049525141716003, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08919833600521088, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08354519307613373, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.10553065687417984, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09726779162883759, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09124822914600372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0813162624835968, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07818234711885452, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05309231951832771, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04613543301820755, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.042637623846530914, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.041780486702919006, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.026522278785705566, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.022090082988142967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021723436191678047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020551862195134163, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.02002808079123497, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013999302871525288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013991925865411758, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012861750088632107, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009341463446617126, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013999302871525288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013999302871525288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.0754622295498848, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07069879025220871, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06827032566070557, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.061861734837293625, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03502104431390762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03289621323347092, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.040536291897296906, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03759048506617546, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.035554125905036926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03157232701778412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030115969479084015, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.020512627437710762, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017958471551537514, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016817227005958557, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016541259363293648, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010276252403855324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008759580552577972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008634217083454132, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008075005374848843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007904532365500927, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0054568140767514706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.0055679865181446075, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005101093556731939, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0038179506082087755, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010276252403855324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010276252403855324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06915277242660522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0647440254688263, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06215975433588028, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.056235432624816895, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.031926658004522324, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.029765108600258827, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03772062435746193, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.034814514219760895, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.032407067716121674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.028780542314052582, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02752798981964588, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019045643508434296, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.016608554869890213, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015285419300198555, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014975868165493011, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009535908699035645, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007892947643995285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.0077363369055092335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007265132386237383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007059567142277956, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005032616201788187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004970462527126074, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004617215134203434, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003243515267968178, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009535908699035645, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009535908699035645, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22478222846984863, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.21154651045799255, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20721907913684845, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18790598213672638, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10481520742177963, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0999346449971199, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11685506999492645, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10787405073642731, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.10622339695692062, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09462440013885498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.09002754092216492, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.059227354824543, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.051392409950494766, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0499960221350193, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04965957999229431, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.029520178213715553, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025308534502983093, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.025204921141266823, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02318907342851162, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02298765629529953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.015129365026950836, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014668093994259834, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014586006291210651, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008848040364682674, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014668093994259834, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.014668093994259834, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13607676327228546, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10249494016170502, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.08440322428941727, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07238686084747314, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05863093212246895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.040068015456199646, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0819009393453598, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07479359209537506, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06384754180908203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.043371621519327164, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04225081205368042, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.0417327843606472, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03597052022814751, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.028827903792262077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.026894690468907356, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.02104143425822258, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.016021141782402992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015303408727049828, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013350292108952999, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012083671055734158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011644970625638962, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012186897918581963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.00935534480959177, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00915595144033432, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013350292108952999, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013350292108952999, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1843681037425995, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17426034808158875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1711578518152237, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15617217123508453, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08634810149669647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08277487009763718, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09542083740234375, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08840582519769669, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08741768449544907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0785495936870575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07493390142917633, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.048469483852386475, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04222758114337921, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.041301876306533813, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.041080813854932785, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024205081164836884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.021236639469861984, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02117607183754444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019657909870147705, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019519999623298645, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012654769234359264, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012785365805029869, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012327334843575954, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008487503044307232, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012654769234359264, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012654769234359264, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22309860587120056, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21092961728572845, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20722585916519165, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18911728262901306, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10439819097518921, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.10009986162185669, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.1153385117650032, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10685380548238754, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10568466037511826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09497823566198349, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.09062474220991135, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05858900770545006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.05096393823623657, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.049857769161462784, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04959902912378311, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02921604923903942, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02541820891201496, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02534479647874832, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.023483481258153915, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023318525403738022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.015159066766500473, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014968588016927242, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014764243736863136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009492498822510242, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014764243736863136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014764243736863136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.71.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.19507762789726257, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1804313212633133, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.17275026440620422, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.1578688770532608, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08807637542486191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.08224757760763168, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.1042933315038681, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09624642878770828, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.09024182707071304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.08011501282453537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07705938816070557, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.05254903808236122, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.045641522854566574, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.04207038879394531, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.04120177775621414, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02620418183505535, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02173147164285183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.021365482360124588, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.020170096307992935, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0196307934820652, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013736763969063759, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013697258196771145, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.012566426768898964, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009012877941131592, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013736763969063759, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013736763969063759, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08464901894330978, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0794156938791275, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07694939523935318, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06982630491256714, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.039324939250946045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03709126263856888, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04512777179479599, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.041826117783784866, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0399041473865509, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03550777584314346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.033864036202430725, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.022855186834931374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01997295953333378, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.018867330625653267, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01860078237950802, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011440497823059559, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.00978427566587925, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00966718327254057, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009023468941450119, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008856390602886677, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006051310803741217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.006135469302535057, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005705062299966812, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004153894726186991, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011440497823059559, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011440497823059559, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07418061792850494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06953069567680359, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06689385324716568, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06047811359167099, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03414444252848625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03196931257843971, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.040142882615327835, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03704167902469635, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03467091545462608, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03080921806395054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02943606860935688, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.020215777680277824, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017643297091126442, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01635531708598137, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016044722869992256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010123131796717644, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00841304287314415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008258482441306114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.0077372100204229355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007532757241278887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005316842347383499, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005240900442004204, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004906144924461842, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003378773108124733, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010123131796717644, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.010123131796717644, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.22563768923282623, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.21226176619529724, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.20807510614395142, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.18897269666194916, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.10504402220249176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.10025711357593536, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.11710148304700851, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10797449946403503, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.1064002737402916, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.09493336826562881, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.09033924341201782, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05921950936317444, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.051397617906332016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.05003926157951355, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.049740035086870193, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02952941693365574, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.025306474417448044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.025209682062268257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.023216448724269867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.023017629981040955, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.015114989131689072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0146228251978755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.014582346193492413, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008772864006459713, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0146228251978755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0146228251978755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15603601932525635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1160806193947792, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10113013535737991, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08411303162574768, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0654868483543396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05088852718472481, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08471079915761948, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07795511186122894, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07308763265609741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.048908572643995285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04494023695588112, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04332166537642479, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03743232414126396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03184012323617935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.030387260019779205, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021737292408943176, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01722106523811817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.016893606632947922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01418941468000412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013202020898461342, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01184163335710764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012292164377868176, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009941649623215199, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008962447755038738, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01418941468000412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.01418941468000412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.18373890221118927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.17366667091846466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.17056210339069366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1556982696056366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0860726609826088, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08254007250070572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09524021297693253, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08814311772584915, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08714954555034637, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07828991860151291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07474192976951599, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.048432789742946625, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04213089868426323, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0412064827978611, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04098581522703171, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.024223849177360535, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02126050367951393, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021198710426688194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019697360694408417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019562002271413803, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279523503035307, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01290720235556364, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012474608607590199, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008703196421265602, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279523503035307, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01279523503035307, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.22226764261722565, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.21012791991233826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.20646372437477112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18846073746681213, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.1040332168340683, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09972640872001648, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11491625010967255, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10648444294929504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10531675815582275, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0946214497089386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0902547836303711, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.058385737240314484, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.050789207220077515, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0496903732419014, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04942840710282326, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.029118835926055908, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02535722404718399, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02528567798435688, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02343173138797283, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.023269295692443848, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01512503158301115, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01496704202145338, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0147321205586195, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009544258937239647, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0147321205586195, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.0147321205586195, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.72.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1818990409374237, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16758926212787628, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.15981946885585785, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.14587372541427612, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08221343904733658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07626291364431381, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09844847768545151, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09063424170017242, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08435742557048798, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.0744660273194313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07161013036966324, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.049648940563201904, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.043082285672426224, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03936110436916351, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.038447435945272446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024800702929496765, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.02047048881649971, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020077258348464966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01896200142800808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.01839534193277359, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013118893839418888, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013145600445568562, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011908923275768757, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008843917399644852, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013118893839418888, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013118893839418888, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08338990807533264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0783400684595108, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07610756158828735, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06902360171079636, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.038754235953092575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03664816915988922, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.044417500495910645, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.040961917489767075, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03930339217185974, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03501276671886444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.033458903431892395, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.022474590688943863, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.019557135179638863, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01857736147940159, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01834402233362198, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011272801086306572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009602622129023075, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009499273262917995, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008851603604853153, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008703513070940971, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005952435079962015, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005942853633314371, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005639996379613876, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003961315844208002, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011272801086306572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011272801086306572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07320579886436462, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0686446949839592, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06630250811576843, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.06002652272582054, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.033783696591854095, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.031743913888931274, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03940481320023537, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03628452122211456, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03424637019634247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03046724945306778, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029145922511816025, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019875062629580498, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.017265120521187782, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.0161579679697752, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015882009640336037, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009939446114003658, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.0082998713478446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008169669657945633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007634938694536686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007466036360710859, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005203631240874529, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.0051225461065769196, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004837274085730314, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003310458967462182, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009939446114003658, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009939446114003658, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.2103077471256256, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19783852994441986, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19396330416202545, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17589129507541656, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09784740209579468, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09342429786920547, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10897532105445862, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.10057088732719421, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09918372333049774, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08842716366052628, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08412671089172363, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.05512877181172371, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04788802191615105, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.046655453741550446, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04636039584875107, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027485480532050133, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02357311174273491, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.023483658209443092, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.021611491218209267, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021425481885671616, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014070820063352585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013617569580674171, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013574168086051941, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008149947971105576, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014070820063352585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.014070820063352585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.15715213119983673, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.11484505981206894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.10068435966968536, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.08761879801750183, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.06754810363054276, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.05086687207221985, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08345555514097214, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07646075636148453, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07283404469490051, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04813526198267937, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04560441896319389, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.042518507689237595, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03673699498176575, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.03276601806282997, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03177769482135773, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.021503128111362457, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.017751110717654228, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.017522618174552917, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014240490272641182, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013551377691328526, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.01200642716139555, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012408594600856304, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.01070189755409956, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.00929377693682909, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014240490272641182, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014240490272641182, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17955638468265533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1696838140487671, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16667987406253815, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1521070897579193, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08408753573894501, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08060946315526962, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09306427091360092, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0861126109957695, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08514183759689331, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0765036791563034, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07302697002887726, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.047307126224040985, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04115184023976326, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04024532809853554, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0400349386036396, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.023652415722608566, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02073987014591694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02068212628364563, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019208494573831558, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019078515470027924, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012461778707802296, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.01255680713802576, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012141097337007523, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.00842632632702589, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012461778707802296, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012461778707802296, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.2147001326084137, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.20298315584659576, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1994098424911499, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.18198280036449432, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.10048795491456985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09633021056652069, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.11101476103067398, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.10285355150699615, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.10173333436250687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.09141071885824203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08723076432943344, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.0563947856426239, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04907320439815521, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04800102114677429, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04774795100092888, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.0281468965113163, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.024514099583029747, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02444513514637947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.022657545283436775, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.022498639300465584, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014645121060311794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014496310614049435, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.014265389181673527, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009276424534618855, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014645121060311794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014645121060311794, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.73.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.17529356479644775, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.16045594215393066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1526070386171341, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.13927480578422546, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07902203500270844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07293997704982758, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.09499580413103104, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08749859780073166, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08133956044912338, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07131437957286835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06852250546216965, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.047807980328798294, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.0416293628513813, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.037908922880887985, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.036988433450460434, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.023936370387673378, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.019834883511066437, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01944810152053833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01834130845963955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017771441489458084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012735879048705101, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012916207313537598, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011532291769981384, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.008862901479005814, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012735879048705101, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012735879048705101, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07852990925312042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0736284926533699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0712718516588211, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0645657405257225, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03640732914209366, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.034279875457286835, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.042247842997312546, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03881894797086716, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.036938462406396866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.032847076654434204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03135237842798233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02143273502588272, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.018575122579932213, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01748574897646904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01722235418856144, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010750451125204563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009124867618083954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.009003516286611557, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008422669023275375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008258542977273464, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005719941575080156, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005808364134281874, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005338342394679785, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.004018327686935663, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010750451125204563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010750451125204563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06919249147176743, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06482431292533875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06246786192059517, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05646971985697746, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03181389346718788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.029845833778381348, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0371943935751915, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03439652919769287, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03230481967329979, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.028706271201372147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.027393678203225136, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018756141886115074, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0163652952760458, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015238611958920956, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014954036101698875, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009379517287015915, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007848935201764107, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007721263449639082, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007218803279101849, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007049229461699724, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004927701782435179, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004894760902971029, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004561680369079113, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003196578472852707, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014954036101698875, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014954036101698875, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19055776298046112, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.179317906498909, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1755911409854889, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.1593538522720337, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08852960914373398, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.0844452828168869, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0989069789648056, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09127095341682434, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08973997086286545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07993893325328827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0760606974363327, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.050025101751089096, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0434618778526783, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04221469908952713, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04191486909985542, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.024950172752141953, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.021357044577598572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02126421593129635, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.019597288221120834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.019399898126721382, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012786428444087505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.012410767376422882, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012297123670578003, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007478178013116121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012786428444087505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012786428444087505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13140197098255157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.10257919132709503, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09310849010944366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0749049037694931, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05740942060947418, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0465683713555336, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.06983029842376709, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06358757615089417, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.061072301119565964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04121573641896248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03806344419717789, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03583379089832306, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.03081747144460678, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.028103986755013466, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.02743545174598694, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.018180960789322853, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.015580719336867332, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015423418954014778, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012751061469316483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012305574491620064, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0103945042937994, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.011171176098287106, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009508948773145676, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.008842814713716507, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012751061469316483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.012751061469316483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1736585944890976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16417285799980164, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.16126272082328796, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.14722032845020294, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08138543367385864, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07803232222795486, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09009969234466553, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08332911133766174, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.08238020539283752, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07404085248708725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07076926529407501, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04594501480460167, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03988045081496239, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0390067882835865, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03880240023136139, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02295876480638981, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02019677311182022, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.020139547064900398, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01873082108795643, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.018601909279823303, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012209934182465076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.012372707948088646, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.011907167732715607, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008485540747642517, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012209934182465076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012209934182465076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.20799323916435242, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.19666971266269684, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.19323129951953888, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.17640140652656555, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.09739843755960464, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.09338561445474625, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10761774331331253, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09971056133508682, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09861738234758377, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08861790597438812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.08452946692705154, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.05469817295670509, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.04758928716182709, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.046551018953323364, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.046306051313877106, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02730534039437771, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.023845288902521133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.02377893775701523, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.02206151932477951, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.021905919536948204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014261972159147263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.014217793010175228, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013894288800656796, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.009264852851629257, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014261972159147263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.014261972159147263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.74.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1816689670085907, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1654767096042633, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1574927419424057, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.143466517329216, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.08170648664236069, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.07530110329389572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.0979020819067955, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.09004788845777512, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.08416163921356201, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.07335419207811356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.07046730816364288, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04941290616989136, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.04282565414905548, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03915241360664368, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03824504092335701, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.024703241884708405, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.020426522940397263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.020059263333678246, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01880582794547081, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.018245777115225792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013100777752697468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.013199079781770706, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011893668211996555, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.00899820402264595, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013100777752697468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.013100777752697468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07675276696681976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.0720614343881607, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0698317214846611, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06323028355836868, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03564091771841049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03361724689602852, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.041055481880903244, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.037853993475437164, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.036152202636003494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03213460370898247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030673358589410782, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.020804252475500107, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01808890327811241, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.017098620533943176, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016857080161571503, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01042840164154768, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008875083178281784, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008772049099206924, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008174820803105831, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008027096278965473, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005530260968953371, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005573182832449675, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005213660188019276, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0037946654483675957, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01042840164154768, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.01042840164154768, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.0709996148943901, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06654926389455795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06412933021783829, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.05794635787606239, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03266042098402977, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.030633237212896347, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.038143329322338104, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.035247739404439926, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03314594179391861, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02944078855216503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.028072835877537727, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.019222166389226913, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.016795476898550987, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.015644587576389313, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.015362751670181751, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009629595093429089, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.00805703829973936, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.00792418047785759, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.00740996515378356, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007234583143144846, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005072268191725016, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005024524871259928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004705253057181835, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003285917453467846, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009629595093429089, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009629595093429089, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19823406636714935, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.1865750253200531, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18282319605350494, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.16586518287658691, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09196604043245316, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08780650794506073, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10247795283794403, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09459144622087479, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09322429448366165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08318065851926804, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.0791412815451622, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.051879096776247025, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.045053113251924515, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04386579245328903, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.0435715951025486, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.025875555351376534, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02226828597486019, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.022178249433636665, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020450623705983162, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.020270489156246185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013305163942277431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01300962083041668, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01285110879689455, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008004535920917988, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013305163942277431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013305163942277431, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13856907188892365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.110798679292202, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.09742417931556702, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07555585354566574, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05990179628133774, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04780685156583786, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.08053124696016312, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07319167256355286, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06461022049188614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.045737914741039276, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.041711729019880295, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04131758213043213, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.035273972898721695, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.029340917244553566, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.027787255123257637, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.0209177415817976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.016221024096012115, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015662677586078644, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013712828047573566, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012704579159617424, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011768152005970478, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012056732550263405, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.009920952841639519, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009126948192715645, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013712828047573566, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013712828047573566, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.16151472926139832, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.1526397466659546, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1499367356300354, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.13685329258441925, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0756077989935875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0724758505821228, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08356843143701553, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07742361724376678, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07654217630624771, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06877212226390839, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0656374841928482, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.042501553893089294, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03700371831655502, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03618289902806282, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.035989128053188324, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.021227315068244934, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.018640968948602676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.01858431100845337, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.017263878136873245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01714169792830944, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01113720703870058, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011280057951807976, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010843733325600624, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007561163976788521, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01113720703870058, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.01113720703870058, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1975710541009903, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.18681998550891876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1835220903158188, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.16752736270427704, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0924975574016571, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.08868587762117386, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.10229213535785675, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.09469874203205109, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.09363193809986115, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.08413108438253403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0802740678191185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.051992274820804596, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.045219335705041885, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04422151669859886, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04398715868592262, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.025966279208660126, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.022691020742058754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.022626090794801712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.020999105647206306, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.020852109417319298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013601372018456459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013592788018286228, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.013245410285890102, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008928157389163971, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013601372018456459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.013601372018456459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.75.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.1610669195652008, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.1455722451210022, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.1377030909061432, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.12556509673595428, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.07249600440263748, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.0661311075091362, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08804142475128174, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.08085019886493683, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.07494867593050003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.06480643898248672, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.06236373260617256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.04453723877668381, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03887240216135979, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03517470881342888, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.03428410738706589, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02249247021973133, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.01912621408700943, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0187261700630188, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.0177146103233099, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.017127258703112602, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012236902490258217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.01333135087043047, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.011001553386449814, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.010055970400571823, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012236902490258217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.012236902490258217, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08448292315006256, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07938097417354584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0770733430981636, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06995464861392975, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03911755979061127, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03698543459177017, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04502999037504196, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04135872796177864, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0396617166697979, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.03537212312221527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.033787354826927185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02280835248529911, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.01973637007176876, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01872849464416504, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.018484866246581078, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011418350040912628, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.009673591703176498, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.00956645142287016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.008921949192881584, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008769862353801727, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.006012352183461189, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005991301499307156, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005657678935676813, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.003993453923612833, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011418350040912628, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.011418350040912628, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.07563607394695282, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.0710015594959259, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06850926578044891, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.062002431601285934, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.034728117287158966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.03260837867856026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.040402282029390335, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.037452440708875656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.03522714599967003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.03138481825590134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.029931122437119484, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.020306222140789032, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01778901368379593, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.01659335382282734, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.016297584399580956, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01015721820294857, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.008487895131111145, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.008345233276486397, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007816781289875507, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.007628308609127998, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.005299130920320749, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.005214308388531208, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004914845805615187, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.003296289360150695, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01015721820294857, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.01015721820294857, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20354606211185455, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19166232645511627, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.18803930282592773, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17087820172309875, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09434489905834198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09022428095340729, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10492858290672302, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09688740968704224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.095574289560318, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08543113619089127, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08142352849245071, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.0531063973903656, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.046116046607494354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04498501867055893, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04470495879650116, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02648431807756424, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.022806931287050247, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02273051254451275, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.020978309214115143, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.02081485092639923, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013596729375422001, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01328064501285553, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.01313757710158825, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00815628468990326, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013596729375422001, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013596729375422001, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.17074598371982574, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.1332775056362152, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.12008403241634369, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.0944547951221466, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.07552751153707504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.06177908182144165, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.0923340767621994, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0837225690484047, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.07959648966789246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.05256011337041855, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.048496540635824203, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.047195956110954285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.04028729721903801, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.036618590354919434, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.03571435436606407, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.023786308243870735, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.019778452813625336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.019517075270414352, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.015595665201544762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014953982084989548, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.013129912316799164, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013634619303047657, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.011919943615794182, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010232847183942795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014953982084989548, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.014953982084989548, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.144343301653862, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.13647089898586273, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.13403162360191345, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.12236781418323517, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06763158738613129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.06486306339502335, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.07484634220600128, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0692947655916214, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06847845017910004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06157531961798668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.0588524229824543, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03818424791097641, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03328682854771614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03255524858832359, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03238033875823021, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.019094394519925117, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.017113130539655685, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.017065053805708885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01593301072716713, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.015826955437660217, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010236802510917187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.010866369120776653, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009980570524930954, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007883218117058277, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010236802510917187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.010236802510917187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.17749808728694916, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.16785074770450592, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1648746132850647, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.15060247480869293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.08333974331617355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.0799352377653122, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.09218987077474594, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.08532439917325974, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0843716412782669, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.07590390741825104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.07255662977695465, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04711833596229553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.0411141999065876, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.04021787643432617, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.04001246392726898, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.02357795648276806, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.02135949581861496, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.021299509331583977, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.019921820610761642, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.019799623638391495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012780170887708664, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.013824752531945705, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.012473976239562035, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.010329607874155045, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012780170887708664, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.012780170887708664, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.76.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.14662081003189087, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.13158945739269257, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.12143410742282867, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.11031260341405869, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.06545481085777283, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.05842704325914383, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.08487880229949951, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.0772654339671135, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.06780089437961578, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.05803393945097923, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.05592003092169762, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.041765011847019196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.03638560697436333, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.03176882117986679, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.030640289187431335, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.02103099226951599, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.017297297716140747, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01687435992062092, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.015937859192490578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.015329836867749691, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011679903604090214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.012271451763808727, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.010416677221655846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009337794966995716, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011679903604090214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.011679903604090214, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07590985298156738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07129288464784622, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06914372742176056, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06271447241306305, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.035159993916749954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03320801258087158, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.04054974764585495, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.037282735109329224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.035643622279167175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.0317700169980526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.030415821820497513, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.02054154872894287, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017806466668844223, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.016870148479938507, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016641493886709213, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010300454683601856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008767782710492611, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008667665533721447, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.00809925515204668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.00795702449977398, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.005478397011756897, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.00551938358694315, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005175837315618992, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0037791405338793993, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010300454683601856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010300454683601856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06753470748662949, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06331612914800644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.06108058616518974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.055265627801418304, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.031011713668704033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.02910422533750534, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.0361611433327198, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03345128521323204, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.031450480222702026, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.02798989973962307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02669355273246765, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.018222589045763016, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.01592991314828396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014842984266579151, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.014578952454030514, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.009119675494730473, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007661839481443167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007537097670137882, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.007056016009300947, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006895813159644604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.0048277792520821095, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004786488134413958, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004488561302423477, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0031519627664238214, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014842984266579151, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014842984266579151, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.19447380304336548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.18303970992565155, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.1794501096010208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.162972554564476, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.09009429812431335, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.08609505742788315, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10033858567476273, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09255777299404144, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09122657775878906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08156310766935349, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07761488854885101, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.050740938633680344, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.044042713940143585, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04294055700302124, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.04266167804598808, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.025292573496699333, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02174288220703602, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.021653147414326668, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.01997067965567112, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.01980600506067276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012979220598936081, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.01262882724404335, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.012535354122519493, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.007697254419326782, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012979220598936081, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.012979220598936081, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.13891004025936127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09257543832063675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.07261814177036285, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.06827543675899506, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05903983488678932, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.03616047278046608, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07887479662895203, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.07151568681001663, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.06465835869312286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.039882585406303406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.03980463370680809, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.04029659554362297, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.034523818641901016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.029050033539533615, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.027636757120490074, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.020487863570451736, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01630568876862526, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.015878507867455482, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013053462840616703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.012093285098671913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.011725704185664654, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.012259749695658684, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010019030421972275, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.009570028632879257, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013053462840616703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.013053462840616703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.1317169964313507, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.12456807494163513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.1223277598619461, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.11166905611753464, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.06152324751019478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.05898050218820572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.06821469962596893, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0630524754524231, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.062282729893922806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.056023724377155304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.05353374406695366, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03470514714717865, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.030164605006575584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.0294803474098444, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.029314953833818436, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.017351223155856133, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.015275608748197556, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.015227317810058594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014175529591739178, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.014076058752834797, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009194597601890564, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.009379967115819454, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.008947646245360374, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.006447474472224712, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014175529591739178, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014175529591739178, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.15650080144405365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.14802956581115723, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.14541663229465485, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.13283716142177582, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.07327896356582642, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.07027087360620499, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.08121071755886078, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.07506027072668076, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.07419098913669586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.06671606004238129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.06381486356258392, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.04145926982164383, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.03600155562162399, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.03520505502820015, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.03501581400632858, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.020732102915644646, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.018424030393362045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.018372252583503723, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.017133846879005432, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.01702280528843403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011124820448458195, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.011569146998226643, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.010847801342606544, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.008262105286121368, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011124820448458195, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.011124820448458195, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.77.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.11498413980007172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.10165955126285553, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.09416861087083817, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.08524908125400543, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.05184810608625412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.045892585068941116, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.06709884107112885, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.060150306671857834, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.05393504723906517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.04554685950279236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.04389939829707146, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.03388362005352974, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.02951156347990036, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.02587081864476204, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.0249315332621336, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0176304392516613, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.015100724995136261, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01472367625683546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.01405419409275055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.013528107665479183, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.01054695900529623, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.011689575389027596, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.009467802941799164, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.009733146987855434, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01472367625683546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.01472367625683546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.07468347251415253, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.07023616880178452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.06826311349868774, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.061876941472291946, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.0345340259373188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.0326884500682354, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03952265530824661, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.0364609993994236, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.035007864236831665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.031232925131917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.029839832335710526, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01999509707093239, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.017399702221155167, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01654406450688839, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.01633722335100174, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010017617605626583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.008567300625145435, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.008477547205984592, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.007909278385341167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.007781637366861105, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.0053091165609657764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.005333936307579279, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.005042297299951315, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0036056560929864645, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010017617605626583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.010017617605626583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.06565044820308685, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.06163104623556137, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05961369723081589, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.053972892463207245, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.03017585165798664, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.028401587158441544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03504825755953789, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.03232762590050697, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.030601050704717636, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.027270451188087463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02606540359556675, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.017701530829072, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.015401985496282578, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014443131163716316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.01420604158192873, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.008859612978994846, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.007419563829898834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.007307352032512426, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.006831050850450993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.006683619227260351, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004655550699681044, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004579515196382999, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.004357762634754181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.0029709285590797663, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014443131163716316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.014443131163716316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.20799916982650757, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.19609610736370087, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.19241905212402344, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.17467714846134186, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.0965481624007225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.09233088046312332, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.10748355090618134, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.09911338239908218, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.09776885062456131, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.08747584372758865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.08331750333309174, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.054423946887254715, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.04722081497311592, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.04604429006576538, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.045768868178129196, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.027142470702528954, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.02339966967701912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.02332012727856636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.02152385376393795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.021357031539082527, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013960062526166439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.013677047565579414, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.013501930050551891, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.008502256125211716, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013960062526166439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.013960062526166439, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.12723217904567719, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.09958785772323608, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.0860377624630928, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.07107767462730408, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.05314105376601219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.04119529202580452, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.07545661926269531, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.06748640537261963, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0581037662923336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.04213447868824005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.04009292647242546, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.03784700110554695, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.033045340329408646, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.026790976524353027, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.025133401155471802, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.019775237888097763, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.01606028899550438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.01549123041331768, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014320972375571728, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.013361434452235699, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.012073962949216366, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.013069643639028072, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.010397929698228836, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.010838544927537441, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014320972375571728, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.014320972375571728, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.11157054454088211, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.10553324967622757, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.10363449901342392, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.0946916714310646, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.05228455364704132, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.05015261843800545, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.05792807415127754, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.05356476828455925, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.05291138216853142, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.04765499383211136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.04566831514239311, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.02973790280520916, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.025912586599588394, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.025349723175168037, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.025213103741407394, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014930388890206814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.013666396029293537, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.013628356158733368, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.012793316505849361, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.012714186683297157, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.008281819522380829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.009109004400670528, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.008093489333987236, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.007057312875986099, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014930388890206814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.014930388890206814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.13260199129581451, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.12540820240974426, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.12316256761550903, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.1125401109457016, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.0619472861289978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.05941334739327431, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.06881532073020935, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.0634833499789238, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.06270477175712585, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.0564458966255188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.054055918008089066, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.03516888990998268, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.030436111614108086, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.029750283807516098, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.029588229954242706, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.017690042033791542, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.01557618472725153, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.015529965050518513, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014496946707367897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.014398219995200634, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.009645262733101845, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.009785945527255535, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.009392325766384602, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.006994776427745819, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014496946707367897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.014496946707367897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.78.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.089634969830513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.07891795784235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.07186992466449738, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.06472694873809814, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.0405312180519104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.03506965935230255, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.055465053766965866, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.04868460074067116, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.04230015352368355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.035433556884527206, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.03491879627108574, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.02861875481903553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.023925235494971275, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.020255958661437035, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.019274655729532242, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.014825512655079365, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.011784507893025875, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.011369181796908379, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.010932225733995438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0103793665766716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.008676069788634777, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.009215058758854866, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.007374783977866173, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.007547295652329922, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.014825512655079365, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.014825512655079365, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.self_attn.q_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.06562702357769012, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.061421673744916916, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.059468112885951996, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.05384407192468643, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.030260741710662842, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.028465639799833298, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.03489364683628082, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.03220832347869873, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.03070102073252201, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.027232632040977478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.02595808357000351, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.01766008883714676, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.015387599356472492, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01452977117151022, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.014321486465632915, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.00885959155857563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.007590085733681917, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.007496979087591171, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.006994197610765696, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.006864412222057581, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.004735919181257486, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.004839031957089901, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.004464785102754831, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.0033706834074109793, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01452977117151022, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.01452977117151022, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.self_attn.k_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.056719403713941574, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.053043995052576065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.05128373205661774, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.04631904140114784, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.02602871134877205, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.024467691779136658, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.03012700378894806, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.02783472277224064, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.02643141709268093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.023402610793709755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.02230222336947918, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.015183891169726849, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013266477733850479, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.012475349940359592, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.012283280491828918, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.007609284482896328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.006457918789237738, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.006370438262820244, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.005934867542237043, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.0058153304271399975, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.004044320899993181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.004043560475111008, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.0037991374265402555, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.002712019020691514, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013266477733850479, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.013266477733850479, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.self_attn.v_proj", + "numel": 8388608, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17724609375, + "total_bits": 18264064.0, + "err": 0.17610615491867065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 19935232.0, + "err": 0.16558365523815155, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 22032384.0, + "err": 0.16207614541053772, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73193359375, + "total_bits": 22917120.0, + "err": 0.14696909487247467, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 27078656.0, + "err": 0.08157966285943985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 31338496.0, + "err": 0.07770949602127075, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 25431040.0, + "err": 0.09125180542469025, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 26226688.0, + "err": 0.08413857221603394, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17724609375, + "total_bits": 26652672.0, + "err": 0.08271855115890503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52880859375, + "total_bits": 29601792.0, + "err": 0.07367872446775436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664794921875, + "total_bits": 30742528.0, + "err": 0.07009562849998474, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 33819648.0, + "err": 0.046221774071455, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 34615296.0, + "err": 0.0401134267449379, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 35467264.0, + "err": 0.038929447531700134, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 36319232.0, + "err": 0.038678128272295, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 42208256.0, + "err": 0.02305523492395878, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 43855872.0, + "err": 0.01985846646130085, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32958984375, + "total_bits": 44707840.0, + "err": 0.019770100712776184, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52880859375, + "total_bits": 46379008.0, + "err": 0.018241409212350845, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73193359375, + "total_bits": 48082944.0, + "err": 0.018070288002490997, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011911969631910324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 51392512.0, + "err": 0.011771723628044128, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2503662109375, + "total_bits": 52431872.0, + "err": 0.011469975113868713, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 68169728.0, + "err": 0.00746492762118578, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011911969631910324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 50596864.0, + "err": 0.011911969631910324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.self_attn.o_proj", + "numel": 67108864, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17596435546875, + "total_bits": 146026496.0, + "err": 0.08843515813350677, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 159395840.0, + "err": 0.05756351351737976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 176173056.0, + "err": 0.04536256939172745, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73065185546875, + "total_bits": 183250944.0, + "err": 0.039208319038152695, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22674560546875, + "total_bits": 216543232.0, + "err": 0.03508799150586128, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73455810546875, + "total_bits": 250621952.0, + "err": 0.023115692660212517, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 203426816.0, + "err": 0.049025386571884155, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 209727488.0, + "err": 0.04410833865404129, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17596435546875, + "total_bits": 213135360.0, + "err": 0.0410255491733551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52752685546875, + "total_bits": 236728320.0, + "err": 0.02480291575193405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664154052734375, + "total_bits": 245897216.0, + "err": 0.024337172508239746, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 270535680.0, + "err": 0.025231588631868362, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 276836352.0, + "err": 0.021846525371074677, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22674560546875, + "total_bits": 283652096.0, + "err": 0.017930081114172935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32830810546875, + "total_bits": 290467840.0, + "err": 0.016898924484848976, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013099100440740585, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22674560546875, + "total_bits": 350760960.0, + "err": 0.011066251434385777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32830810546875, + "total_bits": 357576704.0, + "err": 0.010889731347560883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52752685546875, + "total_bits": 370946048.0, + "err": 0.009578914381563663, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73065185546875, + "total_bits": 384577536.0, + "err": 0.008998030796647072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 404753408.0, + "err": 0.007942000404000282, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 411054080.0, + "err": 0.009211952798068523, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2500457763671875, + "total_bits": 419433472.0, + "err": 0.006775082554668188, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 545271808.0, + "err": 0.007915711961686611, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013099100440740585, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 337644544.0, + "err": 0.013099100440740585, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.mlp.gate_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.08974676579236984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.08463912457227707, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.0830078050494194, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.07556763291358948, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.04169733077287674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.03988073766231537, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.04643576592206955, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.04285448417067528, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.04224231839179993, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.03783296421170235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.036112237721681595, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.02363484725356102, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.020514678210020065, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.019997412338852882, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.01987362839281559, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.011819254606962204, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.010426231659948826, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.010389339178800583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.009663720615208149, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.009587581269443035, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.006307649426162243, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.006509371101856232, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.006125959102064371, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.0045823403634130955, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.011819254606962204, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.011819254606962204, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.mlp.up_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1758335658482144, + "total_bits": 511062016.0, + "err": 0.09777559340000153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3750523158482144, + "total_bits": 557854720.0, + "err": 0.0921601802110672, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6250523158482144, + "total_bits": 616574976.0, + "err": 0.09032313525676727, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7305210658482144, + "total_bits": 641347584.0, + "err": 0.0821935385465622, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266148158482144, + "total_bits": 757870592.0, + "err": 0.04552898928523064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344273158482144, + "total_bits": 877146112.0, + "err": 0.04351869970560074, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312630789620534, + "total_bits": 711986176.0, + "err": 0.050909895449876785, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1250523158482144, + "total_bits": 734015488.0, + "err": 0.04683499038219452, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1758335658482144, + "total_bits": 745943040.0, + "err": 0.0461241751909256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5273960658482144, + "total_bits": 828518400.0, + "err": 0.04126714542508125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.664088657924107, + "total_bits": 860624896.0, + "err": 0.03947453573346138, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031263078962054, + "total_bits": 946867200.0, + "err": 0.026064053177833557, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125052315848214, + "total_bits": 968896512.0, + "err": 0.022527262568473816, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226614815848214, + "total_bits": 992751616.0, + "err": 0.021944859996438026, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328177315848214, + "total_bits": 1016606720.0, + "err": 0.0218066219240427, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01309415977448225, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226614815848214, + "total_bits": 1227632640.0, + "err": 0.011646641418337822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.328177315848214, + "total_bits": 1251487744.0, + "err": 0.011604756116867065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527396065848214, + "total_bits": 1298280448.0, + "err": 0.01083017885684967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.730521065848214, + "total_bits": 1345990656.0, + "err": 0.010747149586677551, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031263078962054, + "total_bits": 1416629248.0, + "err": 0.007207491900771856, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125052315848214, + "total_bits": 1438658560.0, + "err": 0.007547603454440832, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.250013078962054, + "total_bits": 1468009472.0, + "err": 0.007008664775639772, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125052315848214, + "total_bits": 1908420607.9999998, + "err": 0.005618894938379526, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01309415977448225, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b_128g s4", + "bpw": 5.031263078962054, + "total_bits": 1181748224.0, + "err": 0.01309415977448225, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.79.mlp.down_proj", + "numel": 234881024, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1754063197544644, + "total_bits": 510961664.0, + "err": 0.020692965015769005, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37518310546875, + "total_bits": 557885440.0, + "err": 0.017780432477593422, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62518310546875, + "total_bits": 616605696.0, + "err": 0.015892034396529198, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.014178495854139328, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2256295340401784, + "total_bits": 757639168.0, + "err": 0.00946989469230175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7278616768973216, + "total_bits": 875603968.0, + "err": 0.007970933802425861, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0312957763671875, + "total_bits": 711993856.0, + "err": 0.01287834718823433, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12518310546875, + "total_bits": 734046208.0, + "err": 0.011634038761258125, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1754063197544644, + "total_bits": 745842688.0, + "err": 0.009955829940736294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525852748325893, + "total_bits": 828155904.0, + "err": 0.008221220225095749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6630379813058034, + "total_bits": 860378112.0, + "err": 0.00790793914347887, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0312957763671875, + "total_bits": 946874880.0, + "err": 0.0067571913823485374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12518310546875, + "total_bits": 968927232.0, + "err": 0.006039044354110956, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225629534040179, + "total_bits": 992520192.0, + "err": 0.005058447364717722, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326075962611607, + "total_bits": 1016113151.9999999, + "err": 0.004803454037755728, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0312957763671875, + "total_bits": 1181755904.0, + "err": 0.0036845016293227673, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225629534040179, + "total_bits": 1227401216.0, + "err": 0.0033769295550882816, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.326075962611607, + "total_bits": 1250994176.0, + "err": 0.0032818932086229324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525852748325893, + "total_bits": 1297917952.0, + "err": 0.0032071522437036037, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72674560546875, + "total_bits": 1345103872.0, + "err": 0.0030880607664585114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0312957763671875, + "total_bits": 1416636928.0, + "err": 0.002416581381112337, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12518310546875, + "total_bits": 1438689280.0, + "err": 0.0029494347982108593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.236652919224331, + "total_bits": 1464871424.0, + "err": 0.002175387693569064, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12518310546875, + "total_bits": 1908451328.0, + "err": 0.0026699858717620373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.014178495854139328, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72674560546875, + "total_bits": 640460800.0, + "err": 0.014178495854139328, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + } + ], + "last_module_idx": 162, + "base_perplexity": 2.249547569509945 +} \ No newline at end of file