{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.021142661571502686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.020939838141202927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.011009960435330868, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.010954434052109718, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010889262892305851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.007545556873083115, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03587133437395096, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02072034403681755, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.010905690491199493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01086618285626173, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01103656180202961, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012844415381550789, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.010848687961697578, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.008263043127954006, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0075215501710772514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0082784965634346, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007515629753470421, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007302894722670317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007514655590057373, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007301408797502518, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0074328589253127575, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007514357101172209, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0071812286041677, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00730051239952445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.011009960435330868, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.011009960435330868, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02278142049908638, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.022498682141304016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01045851781964302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.010385480709373951, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010305531322956085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.005549488589167595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03020278923213482, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.022238967940211296, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.010338634252548218, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.010264958254992962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.010355242528021336, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.011595640331506729, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.010243366472423077, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.00666029192507267, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.005507700610905886, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006500467658042908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005493086762726307, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.005037783179432154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00549065787345171, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005034215282648802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005113386549055576, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005489745177328587, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0045533436350524426, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005032145418226719, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01045851781964302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01045851781964302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.059329308569431305, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.04993274807929993, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.039446137845516205, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.033451490104198456, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.026580795645713806, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.01884789764881134, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05039023235440254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.038410067558288574, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.028147036209702492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02214955724775791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.023377206176519394, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02732221595942974, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018885686993598938, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013176311738789082, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.011411351151764393, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013838585466146469, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007352108135819435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0065230438485741615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.006572898011654615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0055047618225216866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007055348716676235, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006035428959876299, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0039216019213199615, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004274086561053991, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013176311738789082, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013176311738789082, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0234044399112463, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.014719506725668907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.008571576327085495, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.009822294116020203, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.009358822368085384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.004201150964945555, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.015570491552352905, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.013835351914167404, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.010891251266002655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.00706202257424593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0074327075853943825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.007872577756643295, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.006875296588987112, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005041754338890314, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.004502446856349707, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004157891497015953, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0034351281356066465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0032828859984874725, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0031540975905954838, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0029055410996079445, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0026170569472014904, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0031346732284873724, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0021506352350115776, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0027795457281172276, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.014719506725668907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.014719506725668907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.054336003959178925, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.05128583312034607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.05029866471886635, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.04614023491740227, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.024213315919041634, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.023304058238863945, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.026925748214125633, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.02492045983672142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.024497732520103455, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.022351989522576332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.02152225747704506, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.013653308153152466, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.012007252313196659, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.011716498993337154, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.011646251194179058, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0068907649256289005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0064295632764697075, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.006406490225344896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.006101508159190416, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.006058343220502138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0038880696520209312, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.004431148059666157, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.003790528979152441, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.003550601191818714, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.013653308153152466, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.013653308153152466, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.07981983572244644, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.07543476670980453, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.07408834248781204, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.06802623718976974, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.03559218719601631, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.034265898168087006, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.039480071514844894, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.03649414703249931, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.035996634513139725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.03284517303109169, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.031591422855854034, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.019895363599061966, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.017315562814474106, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0169284138828516, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.016830487176775932, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.009932068176567554, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.008820096962153912, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.00878819264471531, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.008287526667118073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.008235279470682144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.005299142561852932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.005490544252097607, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.005158089101314545, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0038627199828624725, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.009932068176567554, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.009932068176567554, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.059150565415620804, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.051768913865089417, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.03075847402215004, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.02872263640165329, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.025077061727643013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.01575944945216179, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.03679404780268669, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.04704505205154419, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.02635985240340233, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.02275823801755905, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.02259666845202446, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.01647006906569004, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.021438099443912506, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.01646599918603897, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.012687984853982925, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.010903947055339813, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.011160343885421753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.009305139072239399, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.010907551273703575, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.008943879045546055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.006447474937886, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01077430322766304, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.004275696352124214, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.00863436609506607, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.012687984853982925, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.012687984853982925, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013749438337981701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.011027819477021694, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.008474353700876236, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.007440617308020592, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0058911764062941074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.003966988530009985, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.010745959356427193, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.008591282181441784, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.006390497088432312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.004902511835098267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.005219644866883755, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.00566359655931592, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.00425231596454978, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.002959925215691328, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.002534608356654644, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0029170254711061716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0016896759625524282, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.001494365744292736, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0015255478210747242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.001269925618544221, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0015746194403618574, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.001425333903171122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0009947959333658218, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0010336959967389703, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013749438337981701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013749438337981701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.012928780168294907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.010153806768357754, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.007883635349571705, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.006891157943755388, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.005389999132603407, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0037327734753489494, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.008812309242784977, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.00775895407423377, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.005921275354921818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.004369613714516163, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.004423936828970909, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.004473527893424034, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.003717705374583602, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.002667284570634365, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.002358691766858101, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0022548765409737825, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0015124917263165116, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0013800860615447164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.001338761649094522, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0011486185248941183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0012275846675038338, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0012362347915768623, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0008585245232097805, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0009091442334465683, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.012928780168294907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.012928780168294907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07471964508295059, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06347496807575226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05861077085137367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05053030699491501, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03332144394516945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.02860242687165737, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0412747897207737, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03788434714078903, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.034915562719106674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.027154332026839256, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02540450356900692, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02107536606490612, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01817837730050087, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.016087014228105545, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015562934800982475, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01057194173336029, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.008510490879416466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.008313678205013275, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007328389212489128, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.006978808436542749, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005587832536548376, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0057409075088799, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004850481636822224, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004023311659693718, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015562934800982475, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015562934800982475, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14653237164020538, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08701067417860031, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05459456890821457, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06397979706525803, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06291370838880539, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03385240212082863, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08892786502838135, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07943753153085709, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06962589174509048, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04002094641327858, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0426422618329525, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04588712006807327, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.039139524102211, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03183668479323387, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029894452542066574, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.023469066247344017, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018901024013757706, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01831195130944252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015388867817819118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014157118275761604, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013605027459561825, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015275383368134499, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011392737738788128, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012627528980374336, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015388867817819118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015275383368134499, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.10376954823732376, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.0990917757153511, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.09773150831460953, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.09020891785621643, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.045774854719638824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.04453372582793236, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.050392232835292816, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.04654310643672943, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0461466945707798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.04282640665769577, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.04157702252268791, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.025477634742856026, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.022184189409017563, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.021861307322978973, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.02178928256034851, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.012791640125215054, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.011707495898008347, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.011687085963785648, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.011173180304467678, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.011138941161334515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.007105211727321148, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0076963454484939575, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.00700906477868557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.005912180058658123, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.012791640125215054, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.012791640125215054, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1280508041381836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.12263572216033936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.12108975648880005, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.11217786371707916, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.056674204766750336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.055194027721881866, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.062250785529613495, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.05753578245639801, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.05709199607372284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05313339829444885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.05153237283229828, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.031206397339701653, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.02709297090768814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.02672751620411873, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.026642700657248497, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.015521103516221046, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.013592596165835857, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.013566441833972931, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.012907606549561024, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01285548321902752, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.008081435225903988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.007960855960845947, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.007951149716973305, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00507302675396204, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.015521103516221046, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.015521103516221046, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.1085464134812355, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.09727673977613449, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.08524850010871887, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.07879536598920822, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.053090404719114304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.037440232932567596, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.0732206180691719, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.0664939284324646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.054546959698200226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.04879599064588547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.046564456075429916, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.04038897156715393, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.03928178548812866, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.024011977016925812, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.021035490557551384, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.019052736461162567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.013681295327842236, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.012181870639324188, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01294846460223198, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.01127406395971775, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.011677141301333904, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.011292295530438423, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.00914674811065197, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.008336943574249744, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.013681295327842236, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.013681295327842236, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.030219439417123795, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.027836473658680916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.02673930674791336, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.02399485744535923, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.013620596379041672, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.012649565003812313, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.016490954905748367, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.014729301445186138, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.01388130895793438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.012124264612793922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.011592209339141846, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.008418428711593151, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.007063637021929026, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.00653997203335166, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.006407102104276419, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004255062434822321, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.003434294369071722, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0033747253473848104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00314412172883749, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.003062761388719082, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0022831005044281483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.002237070119008422, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0020401952788233757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0015670977300032973, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.014729301445186138, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.014729301445186138, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02411750890314579, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.02216612920165062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.02128482796251774, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.01906261406838894, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010826833546161652, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.010058638639748096, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.012864258140325546, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.011701677925884724, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.011044775135815144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.009606740437448025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.009130085818469524, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0064873844385147095, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0055716391652822495, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005184563342481852, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.005091956816613674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0032568525057286024, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.002708474639803171, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.00266812345944345, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.002470653969794512, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0024123117327690125, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00173541612457484, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0017404068494215608, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0015924146864563227, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0012031348887830973, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.012864258140325546, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.012864258140325546, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11153673380613327, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10420429706573486, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10186365991830826, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09241075068712234, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051056962460279465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048535965383052826, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.057128049433231354, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05276504158973694, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.051811691373586655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04600463807582855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043769802898168564, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02891877479851246, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025092503055930138, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024315033107995987, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02413042075932026, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424688182771206, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012387150898575783, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012327728793025017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011360462754964828, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011242873966693878, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007439783774316311, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007323249243199825, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007145291194319725, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045889294706285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424688182771206, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424688182771206, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1469869166612625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11692947149276733, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10557082295417786, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08503539115190506, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06534410268068314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05399661138653755, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08193392306566238, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07426045835018158, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06950372457504272, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0488712415099144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04478650167584419, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.042453523725271225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.036460138857364655, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03254874795675278, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03155902028083801, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02184949815273285, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018673639744520187, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018393702805042267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01590123400092125, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015294034965336323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012843498960137367, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01410398818552494, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0117145711556077, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011614379473030567, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015294034965336323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015294034965336323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.13921448588371277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.13293041288852692, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1311313360929489, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12145477533340454, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.06316579878330231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06132059544324875, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06959617137908936, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.06416822224855423, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.06370473653078079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05886060371994972, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.05717746540904045, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.035297147929668427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.030548488721251488, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.030108574777841568, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.030009159818291664, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.017684422433376312, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.015704788267612457, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.015677986666560173, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.014887051656842232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.014824679121375084, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.009600327350199223, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0097741037607193, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.009468022733926773, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.006958102807402611, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.014887051656842232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.014887051656842232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17181338369846344, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1643010824918747, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16215869784355164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15039078891277313, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0781591460108757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07593553513288498, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08610156178474426, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07937924563884735, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07881759852170944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07285385578870773, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07068659365177155, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043394654989242554, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03761911764740944, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037091709673404694, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036968886852264404, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021667156368494034, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018901098519563675, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018862897530198097, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0178450345993042, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017766930162906647, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011386290192604065, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011092258617281914, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01121925562620163, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007090485189110041, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011386290192604065, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011386290192604065, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.16010473668575287, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.1438024938106537, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.13723637163639069, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.12450767308473587, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.07177986949682236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.06556273996829987, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.08500610291957855, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.07801337540149689, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.07408987730741501, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.06313980370759964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.060795821249485016, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.04317328333854675, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.03726717829704285, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.034477971494197845, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0337960422039032, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.02170597016811371, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01817869208753109, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.01793881505727768, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.016521917656064034, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.016091343015432358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.011852017603814602, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.011926124803721905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.010962003841996193, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.008499711751937866, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.011852017603814602, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.011852017603814602, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.04287330061197281, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.03981925919651985, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0383320115506649, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.03460371866822243, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.01958618499338627, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.01827283389866352, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.02363884449005127, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02108159475028515, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.019915200769901276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.017552629113197327, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.016858121380209923, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012127168476581573, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.010134052485227585, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.009408203884959221, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.009223727509379387, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006133665330708027, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00492473877966404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.004839757457375526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.004529518075287342, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.004415153991430998, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0032868245616555214, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.003183376509696245, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0029463833197951317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002207877580076456, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012127168476581573, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012127168476581573, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.03450768068432808, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.032028231769800186, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.030800793319940567, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.02779122069478035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.015734216198325157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.01470672432333231, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.01854804717004299, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.01695043221116066, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.016005443409085274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01408619899302721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.013434773311018944, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.009381916373968124, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.008094241842627525, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.007559302728623152, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.00743148336187005, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004703462589532137, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00397183233872056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.003915846347808838, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0036527730990201235, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0035728365182876587, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.002514715539291501, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.002573698293417692, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.002330936724320054, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0018197811441496015, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01408619899302721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01408619899302721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1552315354347229, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14559844136238098, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1424374133348465, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12947170436382294, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07165532559156418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0682746022939682, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08066421002149582, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07392265647649765, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07265772670507431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06468116492033005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06173944100737572, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04085516184568405, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03519866615533829, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03418239951133728, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03392660617828369, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020399754866957664, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017411815002560616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01733240857720375, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01598953828215599, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015837831422686577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010579499416053295, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01028620358556509, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010181792080402374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006473226938396692, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010579499416053295, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010579499416053295, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13675054907798767, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11429456621408463, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10379485040903091, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07903659343719482, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06227891147136688, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052587416023015976, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08352449536323547, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0717543438076973, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0646350160241127, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046751074492931366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.042826104909181595, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04287036135792732, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03598091006278992, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.031806837767362595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0307020116597414, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.022891046479344368, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019303711131215096, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018889302387833595, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016890916973352432, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016262220218777657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014369234442710876, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015470285899937153, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012938336469233036, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013345412909984589, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014369234442710876, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014369234442710876, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16956490278244019, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1613760143518448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1588488221168518, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14631812274456024, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07872775197029114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07607310265302658, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08681810647249222, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08020635694265366, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07951660454273224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0726415291428566, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0700804591178894, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.044426094740629196, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038682129234075546, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0380147248506546, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.037871286273002625, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02228422276675701, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02038579434156418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020344745367765427, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019243501126766205, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019145455211400986, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012344086542725563, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013437374494969845, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012143445201218128, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010313124395906925, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012344086542725563, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012344086542725563, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.202090784907341, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19235728681087494, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18950560688972473, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17459286749362946, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09355492144823074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09045106172561646, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10316252708435059, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09531811624765396, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09449402242898941, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08630596846342087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08319167047739029, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.052422840148210526, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04544258862733841, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04470690339803696, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04451723396778107, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02620484121143818, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023102842271327972, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023061349987983704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021668659523129463, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021559491753578186, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013948247767984867, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014074762351810932, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013707175850868225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00963863916695118, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013948247767984867, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013948247767984867, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.03318595141172409, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.03023177944123745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.018315667286515236, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.016054820269346237, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.010726246982812881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.009506909176707268, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.028117161244153976, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.024798493832349777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.01121361181139946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.008851379156112671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.008388693444430828, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.006937493570148945, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.005990931764245033, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0053130220621824265, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.005531100556254387, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0035846130922436714, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0035770810209214687, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0029205328319221735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0032925563864409924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.002542572794482112, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0027891912031918764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.0029304588679224253, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.001847843755967915, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.0017468706937506795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.01121361181139946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.01121361181139946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.05790776014328003, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.053388725966215134, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.051542192697525024, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.04635641351342201, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.026406392455101013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.024685829877853394, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.030496761202812195, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02809026464819908, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.026924187317490578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02346361055970192, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.022306423634290695, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015432258136570454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.013377521187067032, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.012635268270969391, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012453760020434856, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.007710774429142475, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.006534556392580271, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006464008707553148, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00595423998311162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005839291960000992, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004040078725665808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004076373763382435, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0037904763594269753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0027333814650774, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015432258136570454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015432258136570454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.050263457000255585, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.04631085693836212, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.044635478407144547, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.04015309736132622, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02290390431880951, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.021370185539126396, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.026700017973780632, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02446962706744671, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.023360298946499825, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02034299075603485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.019406389445066452, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013506499119102955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.011648801155388355, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.010960262268781662, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.010790841653943062, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006751623935997486, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005662614945322275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.005592792760580778, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00515505438670516, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005048341117799282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.003533918410539627, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0035306415520608425, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.003302720608189702, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0023482334800064564, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013506499119102955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013506499119102955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.16403502225875854, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15330363810062408, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14975735545158386, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13582953810691833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0759282335639, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07212687283754349, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08507917076349258, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07849186658859253, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07709431648254395, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06821555644273758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0648195818066597, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04308934509754181, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.037389133125543594, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03621766343712807, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03593987971544266, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.021505847573280334, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018444392830133438, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018348317593336105, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01686505600810051, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016687888652086258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011092493310570717, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01088648196309805, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010661118663847446, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006799729540944099, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011092493310570717, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011092493310570717, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1771346926689148, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15617875754833221, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1475946456193924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11875736713409424, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08258672803640366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07420355081558228, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09900571405887604, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08988474309444427, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08510972559452057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06614314764738083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05821774899959564, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05109618604183197, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.043954845517873764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04063599556684494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03983663022518158, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02598416991531849, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022447651252150536, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022160159423947334, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01943969912827015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01892738603055477, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014857269823551178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015901004895567894, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013859588652849197, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012560012750327587, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014857269823551178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014857269823551178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17145487666130066, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1625717282295227, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1599205732345581, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14676178991794586, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0796261727809906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07667405903339386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08776376396417618, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08122159540653229, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08051083981990814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07294349372386932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06989219784736633, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04453711584210396, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038728952407836914, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038008272647857666, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03783830627799034, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022225894033908844, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01954836957156658, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01950404979288578, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018210984766483307, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018102670088410378, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011670373380184174, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011761248111724854, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011430987156927586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007843973115086555, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011670373380184174, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011670373380184174, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21010041236877441, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19934719800949097, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19611437618732452, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18014535307884216, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0975685641169548, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0939563512802124, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.107455313205719, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0994720384478569, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09862612932920456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08940544724464417, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08564507961273193, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05442718416452408, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0473305843770504, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04646702855825424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046256035566329956, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027119485661387444, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023609401658177376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023555558174848557, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021937919780611992, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021806642413139343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014005081728100777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013744523748755455, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013714604079723358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00855645164847374, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014005081728100777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014005081728100777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2028440237045288, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.18134987354278564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1732012778520584, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1537920981645584, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09229366481304169, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08420698344707489, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1077827662229538, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.09918859601020813, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.09509645402431488, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.07938877493143082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07494426518678665, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05483534187078476, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.047414328902959824, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.044313739985227585, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04356233775615692, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.027480533346533775, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.023189179599285126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02292986772954464, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0206769909709692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.020175041630864143, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.014813981018960476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.014898160472512245, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.013839036226272583, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.010371360927820206, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.014813981018960476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.014813981018960476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06620289385318756, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.061059948056936264, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05883551016449928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05284591019153595, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.030277805402874947, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.028226830065250397, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0350399911403656, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.032366227358579636, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.030871042981743813, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02685805968940258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02545277215540409, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.017753111198544502, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015451068989932537, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01451953686773777, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.014294641092419624, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.00887822825461626, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007566077634692192, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007474391255527735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.006892933510243893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.006751342210918665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004685243591666222, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0048079900443553925, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0043762014247477055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003311406821012497, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015451068989932537, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015451068989932537, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.05651655048131943, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.05206291750073433, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05003992095589638, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.04498325660824776, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02584335021674633, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.024051405489444733, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03034755028784275, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.027814239263534546, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.026363808661699295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.022935330867767334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.021883685141801834, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015362922102212906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.013260964304208755, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.012393392622470856, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012183674611151218, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.007684895768761635, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.006435086019337177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006343618966639042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0058612702414393425, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005725291091948748, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0040414175018668175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0040724799036979675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.003753840457648039, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0027557567227631807, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015362922102212906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015362922102212906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17554409801959991, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16390550136566162, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1600421965122223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1448434740304947, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08141086995601654, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07726603746414185, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09154224395751953, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08434540033340454, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08268812298774719, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07299734652042389, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06922921538352966, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04641064256429672, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04019628092646599, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.038885194808244705, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03856542333960533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02314716950058937, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019796811044216156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01969541236758232, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01806977577507496, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017866840586066246, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0119528379291296, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.011710569262504578, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011461074464023113, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007333521265536547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0119528379291296, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0119528379291296, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18164081871509552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15734359622001648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14892999827861786, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12213047593832016, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08413688093423843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07504945993423462, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09964723885059357, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09043912589550018, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08698073029518127, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06738607585430145, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05960662662982941, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05135239288210869, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04415710270404816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04124924913048744, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04058201238512993, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026153836399316788, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022805742919445038, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02260526828467846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019788939505815506, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019359499216079712, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014974523335695267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016107887029647827, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014089441858232021, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012821095064282417, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014974523335695267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014974523335695267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16524630784988403, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15630164742469788, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15361729264259338, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14071837067604065, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07700292766094208, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07398233562707901, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08514704555273056, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07875529676675797, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07793337106704712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07027451694011688, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06728172302246094, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04330980405211449, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.037651803344488144, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.036874257028102875, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036694858223199844, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021645989269018173, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01915283128619194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019106131047010422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017814934253692627, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017708783969283104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011518552899360657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011806806549429893, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011277337558567524, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008202334865927696, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011518552899360657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011518552899360657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21740595996379852, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20572452247142792, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20231053233146667, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18524932861328125, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10125711560249329, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0973210409283638, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11186102777719498, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10342365503311157, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10244480520486832, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09241407364606857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0883590430021286, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.056711968034505844, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04928293824195862, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0482921302318573, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04805964231491089, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028302941471338272, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024546485394239426, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024486016482114792, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0227296594530344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022584784775972366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014682176522910595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014330334030091763, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014347490854561329, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008936863392591476, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014682176522910595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014682176522910595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.20839691162109375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.18605414032936096, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.17742136120796204, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15729376673698425, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09493361413478851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08639904856681824, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1114407554268837, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10248725861310959, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.09789835661649704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08157806843519211, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07691333442926407, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.056831590831279755, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0490790531039238, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04571069777011871, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0448884591460228, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.028554623946547508, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024085093289613724, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02379979006946087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.021510589867830276, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.020962461829185486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01559504121541977, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.015724387019872665, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014539326541125774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011182633228600025, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01559504121541977, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01559504121541977, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08217410743236542, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07655660063028336, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0743413046002388, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06724544614553452, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03796424716711044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.035806991159915924, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04322861507534981, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03993489220738411, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.038583576679229736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03403520584106445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.032312020659446716, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.021926341578364372, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019091229885816574, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018208859488368034, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01800130121409893, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010969571769237518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0094757666811347, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009393097832798958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008695902302861214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008565579541027546, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005786660127341747, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005945772398263216, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005493988282978535, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004097263794392347, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010969571769237518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010969571769237518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06709007918834686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06256610155105591, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0606621615588665, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05489445850253105, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.031013889238238335, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.02919299528002739, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03577041253447533, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03281622752547264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.031507182866334915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02783474512398243, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.026579443365335464, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01815369911491871, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015666618943214417, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014862860552966595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.014669420197606087, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009080236777663231, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007686643395572901, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007607220206409693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007050391752272844, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.006927970796823502, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004758815746754408, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004774894565343857, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004488012287765741, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0031977221369743347, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014862860552966595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014862860552966595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19136665761470795, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1793876588344574, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1755158007144928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1594138741493225, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08909548819065094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08479941636323929, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0996512845158577, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09192963689565659, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09032373130321503, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08023399859666824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07620598375797272, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05052808299660683, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.043790336698293686, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04251207411289215, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04220359027385712, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025184668600559235, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02160150557756424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021503105759620667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01978042908012867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019579516723752022, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012950983829796314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01267844159156084, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012471376918256283, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00784915778785944, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012950983829796314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012950983829796314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1787324994802475, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1549779623746872, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14552347362041473, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12151403725147247, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08293335884809494, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07313454151153564, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10008793324232101, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09103737771511078, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08552979677915573, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06490203738212585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06108904257416725, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05159416049718857, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04431791976094246, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04064910486340523, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.039775025099515915, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026007220149040222, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022343771532177925, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022006111219525337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019049042835831642, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018465086817741394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014626748859882355, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015782130882143974, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013498677872121334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012330802157521248, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014626748859882355, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014626748859882355, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1620061993598938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15301769971847534, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15027841925621033, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13739782571792603, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07564721256494522, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07256622612476349, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08360707014799118, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07743897289037704, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07659178227186203, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06883829087018967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0657208189368248, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04247787967324257, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03697681054472923, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03616180270910263, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03598254919052124, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021203387528657913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018588818609714508, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018534570932388306, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01720951497554779, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017092274501919746, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01109648123383522, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011181860230863094, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01083169411867857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0074093337170779705, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01109648123383522, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01109648123383522, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22024545073509216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20812250673770905, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20447036623954773, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1870582103729248, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10285068303346634, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.098679319024086, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.113711416721344, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10519963502883911, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10412998497486115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09359598159790039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0894194170832634, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0577080212533474, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05014042928814888, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04906545951962471, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04882029443979263, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02878820151090622, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024962596595287323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02489352598786354, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023060429841279984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022899432107806206, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01494569331407547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01460457406938076, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014584317803382874, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00912573840469122, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01494569331407547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01494569331407547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21313993632793427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19079597294330597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1820962131023407, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16168059408664703, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09728305786848068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08872295171022415, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11424291878938675, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10499918460845947, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10028673708438873, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08379308879375458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07913151383399963, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05817977711558342, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05028887093067169, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04682933911681175, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.045985147356987, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.029205363243818283, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024667706340551376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.024371985346078873, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02206612005829811, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021508604288101196, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01586003415286541, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.0160884577780962, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014763521030545235, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011435589753091335, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014763521030545235, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014763521030545235, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08073227852582932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07530633360147476, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07312541455030441, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06611112505197525, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.037293750792741776, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03522225841879845, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04254970699548721, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03925569728016853, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03789189085364342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.033439721912145615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03176737204194069, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.021568449214100838, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018737776204943657, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.017871834337711334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.017658378928899765, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010783442296087742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009228279814124107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009143069386482239, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008451094850897789, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008318371139466763, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005652627907693386, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005698298569768667, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005355119239538908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003805025015026331, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010783442296087742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010783442296087742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06719447672367096, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0627356767654419, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06076928973197937, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05497640371322632, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03108184039592743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.029260648414492607, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.035777464509010315, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.032977137714624405, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.031582269817590714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.027882881462574005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.026566307991743088, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.018134618178009987, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015731537714600563, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014882729388773441, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.014681820757687092, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009062389843165874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007681982591748238, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007596186362206936, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007040384225547314, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.006914486642926931, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004741458222270012, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00475799897685647, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004459603223949671, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003155155573040247, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014882729388773441, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014882729388773441, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18882369995117188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17701928317546844, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17314141988754272, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15702660381793976, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08790645003318787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08368292450904846, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09865841269493103, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09080586582422256, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.089169442653656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07911421358585358, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07518874108791351, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05008986219763756, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0432765819132328, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041975706815719604, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04166487604379654, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.024972105398774147, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02133096382021904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02123222127556801, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019518213346600533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019322926178574562, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012856317684054375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.012531374581158161, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012340538203716278, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007752280682325363, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012856317684054375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012856317684054375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2011307328939438, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16807715594768524, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15678279101848602, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1289709508419037, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09170123189687729, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07933132350444794, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.109882652759552, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10005206614732742, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09577181190252304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0709734559059143, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06501491367816925, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0566389337182045, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0489388108253479, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.045155975967645645, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044240809977054596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02878667786717415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025231454521417618, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024986444041132927, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02158963680267334, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021013658493757248, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016540413722395897, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018197976052761078, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01540962141007185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014655051752924919, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01540962141007185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01540962141007185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15140500664710999, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14296339452266693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14034968614578247, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12828919291496277, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07084289193153381, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06788413971662521, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07855360209941864, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0726199746131897, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07171504944562912, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06439696997404099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.061527349054813385, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03994014114141464, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03473003953695297, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03393281623721123, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.033741295337677, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.019981026649475098, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017545649781823158, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01749151013791561, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016253458335995674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016135483980178833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010596212930977345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01072295568883419, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010331596247851849, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0073058088310062885, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010596212930977345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010596212930977345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21790827810764313, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20586590468883514, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2021743506193161, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18493995070457458, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10207448899745941, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09792476892471313, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11306525021791458, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10449965298175812, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10334590077400208, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09284669160842896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0887785330414772, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.057515859603881836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.049958180636167526, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04885834828019142, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0486048199236393, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028731394559144974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02524021826684475, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02516983263194561, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023375390097498894, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02322101593017578, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015157720074057579, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015345992520451546, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01480168104171753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010388539172708988, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015157720074057579, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015157720074057579, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.20222841203212738, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.180251806974411, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1717403531074524, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15243856608867645, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09212897717952728, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08375764638185501, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1092754676938057, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.0996684655547142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.09505395591259003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.07923810929059982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07480589300394058, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05547351390123367, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.04792598634958267, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04458877444267273, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04376932233572006, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.028067972511053085, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02391824685037136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.023624958470463753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02149026095867157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.020952550694346428, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01570120081305504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016124224290251732, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01465616375207901, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01201304979622364, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01465616375207901, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01465616375207901, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.091714046895504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08571990579366684, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08328723162412643, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.075371652841568, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.042405202984809875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04009192809462547, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04828733578324318, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04455902427434921, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.043064530938863754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03809633105993271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.036258820444345474, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024468999356031418, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021247267723083496, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020301563665270805, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020071182399988174, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012234543450176716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010480868630111217, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010390055365860462, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009612633846700191, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009471739642322063, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006413259077817202, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0064596147276461124, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006093267817050219, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004312924575060606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012234543450176716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012234543450176716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07418617606163025, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0693117156624794, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0672277882695198, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.060879409313201904, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03433344513177872, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03236956521868706, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.039398614317178726, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03632459044456482, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0348564088344574, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03085022233426571, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02940441109240055, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01998019963502884, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.017335228621959686, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.016438277438282967, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.016230348497629166, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009985437616705894, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00848501268774271, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.008394780568778515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00778614217415452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007652296219021082, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005234950687736273, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005249131470918655, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00494465883821249, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0034842819441109896, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009985437616705894, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009985437616705894, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20047980546951294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18804821372032166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18396221101284027, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16700807213783264, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09336916357278824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08894231915473938, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10496827214956284, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09638077020645142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09469199925661087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08412241190671921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08011581748723984, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05330429971218109, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.045939087867736816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04456577077507973, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044247888028621674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02658041939139366, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022656382992863655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022549986839294434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020753299817442894, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020547328516840935, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013699725270271301, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01330533716827631, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01313673984259367, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00825249683111906, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013699725270271301, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013699725270271301, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18996559083461761, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16774404048919678, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.160415917634964, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13651616871356964, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08829710632562637, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07994595915079117, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10187266767024994, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09354391694068909, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09080905467271805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0729069709777832, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0656907707452774, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.052741888910532, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04564258083701134, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04327944666147232, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04270029067993164, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026807475835084915, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02372516132891178, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02356577478349209, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020892828702926636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02054653689265251, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015338867902755737, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01648728735744953, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01464625634253025, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013029171153903008, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015338867902755737, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015338867902755737, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15662670135498047, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14780019223690033, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14506831765174866, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13255061209201813, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0734463781118393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07036874443292618, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08145078271627426, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07534117251634598, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07436175644397736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06675571203231812, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06374640017747879, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04157351702451706, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03611774370074272, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.035273030400276184, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03507960960268974, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020783929154276848, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01840142533183098, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01834462210536003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01707989163696766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0169551819562912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0111033134162426, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011481043882668018, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010826393030583858, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008101249113678932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0111033134162426, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0111033134162426, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21923862397670746, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20709167420864105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2033410668373108, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1858355551958084, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1025485098361969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09831200540065765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11388988792896271, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10503605008125305, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10381564497947693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09323704242706299, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0890943855047226, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05779585987329483, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05012999475002289, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04900159686803818, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.048735663294792175, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02890254557132721, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02501809597015381, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024941593408584595, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02310306765139103, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02293536812067032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015156754292547703, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014781307429075241, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014782429672777653, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009426407516002655, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015156754292547703, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015156754292547703, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21480470895767212, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19161206483840942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18251588940620422, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1618397831916809, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09822896122932434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0891827642917633, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11598771810531616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10626215487718582, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10126250237226486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08422048389911652, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07938553392887115, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.059080664068460464, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05111163854598999, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.047518134117126465, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04662951081991196, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.029780607670545578, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02538882941007614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02507811412215233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022731037810444832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022156503051519394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016422703862190247, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017014101147651672, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015288923867046833, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012576676905155182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015288923867046833, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015288923867046833, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.097457155585289, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0910518541932106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08853727579116821, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08019024133682251, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04522405564785004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.042754095047712326, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05145503953099251, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047453925013542175, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.045899685472249985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040662284940481186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03871383145451546, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02618204429745674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022784996777772903, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021772466599941254, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021527128294110298, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013123322278261185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011442155577242374, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011343722231686115, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010546725243330002, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010400386527180672, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007008129730820656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00733191380277276, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00666667427867651, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005237936973571777, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013123322278261185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013123322278261185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08232472091913223, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07699882984161377, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07472875714302063, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06773263961076736, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0381934717297554, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.036064088344573975, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04371536523103714, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.040318362414836884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0387716218829155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03436722233891487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03279104828834534, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022197505459189415, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019248904660344124, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018303953111171722, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018077146261930466, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011099018156528473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009467190131545067, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009373882785439491, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008700194768607616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008557597175240517, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0058318548835814, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005873026791960001, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005518003832548857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003944140858948231, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011099018156528473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011099018156528473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19996285438537598, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18770088255405426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18356560170650482, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16673117876052856, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09324940294027328, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08880820125341415, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10489240288734436, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09630094468593597, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.094558484852314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08406826108694077, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08004121482372284, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.053274840116500854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.045926354825496674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04454878717660904, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04420491307973862, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026572715491056442, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022692011669278145, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0225819144397974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020808083936572075, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02059645764529705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013742639683187008, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013403050601482391, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013189814984798431, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008420297876000404, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013742639683187008, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013742639683187008, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2016715407371521, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1732465624809265, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1620025783777237, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13028287887573242, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09266113489866257, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0815221518278122, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11176305264234543, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10238257795572281, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09608825296163559, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07325532287359238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0643545538187027, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05788201466202736, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05015166848897934, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0458366759121418, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04477350413799286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02945820428431034, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025801243260502815, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02544596418738365, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02240097150206566, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021751251071691513, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017062779515981674, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018874000757932663, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01577812246978283, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015327316708862782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015327316708862782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015327316708862782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1634591668844223, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15428030490875244, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15136834979057312, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13832131028175354, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0766574814915657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07340628653764725, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08499667048454285, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07865890115499496, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07760971784591675, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06964904069900513, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06645341217517853, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04332125931978226, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03768070787191391, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03677830472588539, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036570217460393906, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02164665050804615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019126368686556816, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01905778795480728, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017736516892910004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01759985089302063, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01148331817239523, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011843577958643436, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011187233962118626, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008247969672083855, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01148331817239523, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01148331817239523, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22267186641693115, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21028006076812744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20643514394760132, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1886356621980667, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10424645990133286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09991898387670517, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11555458605289459, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1068103238940239, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10553941130638123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09472360461950302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09034696966409683, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.058633603155612946, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05095555633306503, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04979483410716057, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04953061044216156, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02926700934767723, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0253891721367836, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025308648124337196, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023437172174453735, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02325885184109211, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015208457596600056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014957782812416553, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014811670407652855, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009465225972235203, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015208457596600056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014957782812416553, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22569230198860168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20178551971912384, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19241617619991302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17034222185611725, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10356053709983826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0941825658082962, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12174684554338455, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11183721572160721, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10675264149904251, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0887855738401413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08375426381826401, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06239055469632149, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05395042523741722, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0502278134226799, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04932064563035965, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03147714212536812, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027049317955970764, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026722414419054985, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0242727380245924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023682793602347374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017506582662463188, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018359532579779625, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016319790855050087, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013837689533829689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013837689533829689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013837689533829689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10456246137619019, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09777526557445526, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0951140969991684, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08610859513282776, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04849035292863846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04587553068995476, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05499295890331268, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05077260732650757, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04920423775911331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04354242980480194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0413818396627903, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027923576533794403, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024276841431856155, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023249097168445587, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02300448901951313, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013965451158583164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01207034569233656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01197777409106493, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0110931983217597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010940546169877052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007359105162322521, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007525892928242683, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00701010599732399, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005156621336936951, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013965451158583164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013965451158583164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08441966772079468, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0789380595088005, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07659907639026642, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06933913379907608, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03909188136458397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.036911141127347946, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04474934563040733, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04130029305815697, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03969525173306465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03514521196484566, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03348029777407646, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02269558236002922, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019717417657375336, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01873750425875187, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018498899415135384, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01134201418608427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009684219025075436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009583861567080021, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008890202268958092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00874413549900055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005954031366854906, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006005138158798218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005636605434119701, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004013675265014172, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01134201418608427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01134201418608427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20021645724773407, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18767903745174408, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18335981667041779, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1663431078195572, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09325656294822693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08866076916456223, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10488861054182053, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0966673493385315, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09457036852836609, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08396996557712555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07972899824380875, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05318000540137291, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.046094901859760284, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04453667253255844, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04416414350271225, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026542728766798973, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022727951407432556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022595679387450218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020829251036047935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02058897726237774, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013735915534198284, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013515020720660686, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013170458376407623, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00853533111512661, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013735915534198284, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013735915534198284, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2130201756954193, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1927948147058487, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1845562756061554, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1588420271873474, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09982931613922119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0917690247297287, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11718827486038208, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1073451042175293, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10204091668128967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08509305864572525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07715042680501938, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06040044501423836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05236869677901268, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04891752451658249, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04808653146028519, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03057227097451687, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02685742825269699, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026525719091296196, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024138882756233215, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023624958470463753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017211169004440308, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018790792673826218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016173357143998146, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01477669645100832, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01477669645100832, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01477669645100832, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15612506866455078, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.147150456905365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14427974820137024, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13169646263122559, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07327581197023392, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07011618465185165, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08133818954229355, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0752510353922844, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07421476393938065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06645115464925766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0634230226278305, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.041561152786016464, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.036130521446466446, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.035241901874542236, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03503415361046791, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020798400044441223, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018467983230948448, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0184085201472044, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017130419611930847, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017006203532218933, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011179227381944656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011647327803075314, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010893377475440502, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008354898542165756, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011179227381944656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011179227381944656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21917614340782166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2067241221666336, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20289312303066254, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18520943820476532, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10254974663257599, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09818735718727112, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11386127024888992, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10510538518428802, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10381831228733063, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09301745891571045, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08869629353284836, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05766249820590019, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05015106871724129, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04898601397871971, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04871656373143196, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02883944660425186, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024991454556584358, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02491188608109951, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023035481572151184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02285849116742611, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015056464821100235, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014739925041794777, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0146636376157403, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00935562513768673, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015056464821100235, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015056464821100235, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22719088196754456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2040921300649643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1952897608280182, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17343071103096008, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10424100607633591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0954694151878357, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12128248810768127, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11166062951087952, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10723093897104263, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08984574675559998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08459922671318054, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06198975071310997, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05365573987364769, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050315406173467636, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04951252043247223, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031244339421391487, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02672392502427101, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026447037234902382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023972705006599426, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023441361263394356, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0173043180257082, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.0176501777023077, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016301218420267105, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012902220711112022, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012902220711112022, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012902220711112022, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10317052155733109, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09647532552480698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09385068714618683, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08489885926246643, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04786273464560509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04528749734163284, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05440358445048332, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05020473152399063, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04858320951461792, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04299869015812874, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04087061434984207, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027592703700065613, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02397807501256466, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022922566160559654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02267318218946457, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01379353553056717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011846345849335194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011742942035198212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010866900905966759, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010708536952733994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007235962897539139, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007311348803341389, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00687790010124445, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004898599348962307, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01379353553056717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01379353553056717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08486348390579224, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07943452149629593, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07702988386154175, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06976167112588882, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03942243009805679, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03718354552984238, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04509251192212105, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0416564904153347, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.039990879595279694, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.035425279289484024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03371651470661163, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022861801087856293, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019877519458532333, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01886560581624508, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018620280548930168, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011419462971389294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009722895920276642, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009617677889764309, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008918370120227337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008764058351516724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00597565621137619, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005985206458717585, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00565147353336215, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003942972514778376, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011419462971389294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011419462971389294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2027350813150406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19005250930786133, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18557608127593994, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16817961633205414, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0944676324725151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08977337926626205, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.106478750705719, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0980154424905777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09582360833883286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.084998220205307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08063694089651108, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05399081110954285, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04674318805336952, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.045158445835113525, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04477272182703018, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026942040771245956, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02304038405418396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0229030754417181, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02109646238386631, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02085319720208645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013944404199719429, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013704154640436172, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013355541974306107, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008658566512167454, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013944404199719429, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013944404199719429, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21146996319293976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18817578256130219, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17777882516384125, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15170468389987946, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0975104570388794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08821087330579758, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11825411021709442, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10860395431518555, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10142068564891815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08245459198951721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0762142613530159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06109611690044403, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05277968570590019, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.047736916691064835, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046486347913742065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030721111223101616, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026132728904485703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025689981877803802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023352721706032753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02258995547890663, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016973575577139854, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018456000834703445, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015401508659124374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014224136248230934, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015401508659124374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015401508659124374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15879836678504944, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14963199198246002, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14665189385414124, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1336161345243454, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07456298172473907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07129788398742676, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08294078707695007, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07667744159698486, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0755319818854332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06753024458885193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0643680989742279, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04233188554644585, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03684118017554283, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.035893648862838745, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.035669904202222824, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021189860999584198, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018853820860385895, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018785784021019936, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017477218061685562, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017345335334539413, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01139116007834673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011960258707404137, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011082664132118225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008645087480545044, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01139116007834673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01139116007834673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2181585133075714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20558732748031616, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20167388021945953, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1837933510541916, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10209137946367264, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0976664200425148, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11337652802467346, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10476523637771606, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10339032113552094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09244144707918167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08796358108520508, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05749540403485298, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04996280372142792, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04876408353447914, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04847585782408714, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028686007484793663, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024865666404366493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024779440835118294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0228851567953825, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022707190364599228, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014864407479763031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014652595855295658, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014448934234678745, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009262170642614365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014864407479763031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014864407479763031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2324669361114502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20954862236976624, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20085959136486053, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17764629423618317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10691794008016586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09810172766447067, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12448678910732269, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11445441097021103, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1099424660205841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09228745102882385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08670790493488312, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06365051865577698, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05502577871084213, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05157751590013504, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05074825882911682, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03210793808102608, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0273366030305624, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027047783136367798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02452278882265091, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023986293002963066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017754442989826202, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017969956621527672, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016695253551006317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013063684105873108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013063684105873108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013063684105873108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10988983511924744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10274941474199295, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09995619207620621, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09034251421689987, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05103498324751854, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0482792966067791, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05782800912857056, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05342589318752289, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05179275572299957, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045793429017066956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04346473887562752, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02934250235557556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025493722409009933, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024421216920018196, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024164145812392235, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014657648280262947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012558842077851295, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01245777029544115, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01149783842265606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011336615309119225, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007655104156583548, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007658498827368021, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0072897057980299, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0050259800627827644, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014657648280262947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014657648280262947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08959811925888062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08381426334381104, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08133213222026825, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07360076904296875, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04160456359386444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03928445652127266, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.047534361481666565, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04392601177096367, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04222919046878815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03739919140934944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03556361421942711, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024062521755695343, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020965255796909332, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01991993561387062, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01966690830886364, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012022741138935089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01024826243519783, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01014114823192358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009399134665727615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00924014300107956, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006280533969402313, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006289571989327669, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005947064608335495, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0041192579083144665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012022741138935089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012022741138935089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20804600417613983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19486567378044128, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1901821494102478, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17218516767024994, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09700745344161987, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09207575023174286, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10941784083843231, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10072116553783417, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09838749468326569, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08714845776557922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08270911127328873, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05550448223948479, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.048116397112607956, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04638633877038956, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04596993699669838, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027733957394957542, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02373935654759407, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023581501096487045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021733703091740608, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021473616361618042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014446237124502659, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014242156408727169, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013828086666762829, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009148433804512024, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014446237124502659, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014446237124502659, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21827208995819092, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1968671679496765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1889481246471405, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1644047349691391, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10187831521034241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0936807245016098, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11808528006076813, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10823516547679901, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1044069156050682, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08672747015953064, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07970711588859558, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06069467216730118, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05250738933682442, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04964025318622589, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04896295443177223, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030524926260113716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026894675567746162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026666829362511635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024063263088464737, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023638857528567314, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01688924990594387, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018317563459277153, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016011955216526985, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014122401364147663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014122401364147663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014122401364147663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16102415323257446, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15169018507003784, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1486644595861435, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13539601862430573, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07566724717617035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07234277576208115, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08414128422737122, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0778115764260292, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07666425406932831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0685066431760788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06535353511571884, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04298108071088791, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03740821033716202, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03645041957497597, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03622030094265938, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021524932235479355, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019185462966561317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019116120412945747, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017782429233193398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017648721113801003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011600875295698643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012219484895467758, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011291594244539738, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008885226212441921, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011600875295698643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011600875295698643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21957607567310333, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20684489607810974, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20283764600753784, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18476074934005737, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10281416028738022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0983111709356308, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11419866234064102, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10553118586540222, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1041545569896698, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09303887188434601, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0885753408074379, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05799293518066406, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05034077540040016, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04910966381430626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04882263392210007, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028948256745934486, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025015288963913918, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02492741495370865, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023004457354545593, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022821100428700447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015024464577436447, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014708382077515125, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01459454745054245, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00924922525882721, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015024464577436447, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015024464577436447, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2337893545627594, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2107868492603302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20214027166366577, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17935842275619507, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10778568685054779, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09893730282783508, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12528246641159058, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11501210182905197, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11070045083761215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0930987149477005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08753260970115662, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06416881084442139, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055389758199453354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05210355669260025, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.051301855593919754, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03246599808335304, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02775191143155098, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027481788769364357, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024948135018348694, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024435538798570633, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018131688237190247, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01838480681180954, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01712695136666298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013577812351286411, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013577812351286411, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013577812351286411, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11732746660709381, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10968134552240372, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1067919060587883, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09664180874824524, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.054509032517671585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.051646724343299866, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06176506727933884, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05698643624782562, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.055336449295282364, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04896508529782295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04652116447687149, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03135935962200165, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02727196179330349, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02614922821521759, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02588701993227005, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01567789912223816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013565224595367908, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013458590023219585, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012453758157789707, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012286990880966187, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00825477298349142, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008426121436059475, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007877957075834274, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005748092662543058, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013565224595367908, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013565224595367908, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09238270670175552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08637935668230057, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08369576930999756, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07574307173490524, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04287010058760643, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04040028154850006, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.049166202545166016, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04542592540383339, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.043492041528224945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03851982578635216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03665067255496979, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024927325546741486, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021685540676116943, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020532632246613503, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02025495283305645, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012452910654246807, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010587324388325214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010464196093380451, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009713509120047092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009537800215184689, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006525248754769564, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00654389476403594, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006163462996482849, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004323048982769251, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012452910654246807, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012452910654246807, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21713247895240784, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20342424511909485, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19867894053459167, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1800878942012787, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10134866088628769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09626772999763489, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1141698807477951, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10504263639450073, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10278903692960739, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09111838042736053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0864708423614502, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.057955071330070496, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05017274245619774, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.048462867736816406, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04805499315261841, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028962310403585434, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024792812764644623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024644536897540092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022708119824528694, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02244287170469761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015065006911754608, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014853623695671558, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01443386822938919, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009517856873571873, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015065006911754608, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015065006911754608, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21786442399024963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.192824125289917, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1847137212753296, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15779276192188263, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10152340680360794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0921952947974205, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11769231408834457, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10749158263206482, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10460374504327774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08464328199625015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07718328386545181, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06081802770495415, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0524309016764164, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.049734245985746384, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04909549653530121, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03071933053433895, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027149638161063194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026966385543346405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024013735353946686, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02360803447663784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017226161435246468, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018707897514104843, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01641552522778511, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014662347733974457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014662347733974457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014662347733974457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16005507111549377, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15070560574531555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14767789840698242, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1345244199037552, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07526747137308121, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07194554805755615, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0836828351020813, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07739096134901047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07624755054712296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06812415271997452, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06484334170818329, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.042728353291749954, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03720936179161072, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03625916317105293, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03603590279817581, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021393023431301117, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0190554391592741, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018992986530065536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01766159012913704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017524689435958862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011512446217238903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012115320190787315, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01120498776435852, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008764440193772316, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011512446217238903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011512446217238903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21983495354652405, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20707957446575165, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2031526267528534, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18503133952617645, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10294507443904877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09845445305109024, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11437168717384338, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10562584549188614, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10428935289382935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09315858781337738, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0887407660484314, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05806954205036163, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05039861798286438, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0491897277534008, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0488969162106514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029012365266680717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025064636021852493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024983234703540802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02304520271718502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022864744067192078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015087698586285114, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014741746708750725, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014677049592137337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009274113923311234, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015087698586285114, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015087698586285114, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2328951358795166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20977844297885895, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20091979205608368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17836375534534454, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10702081024646759, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09811806678771973, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12521584331989288, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11471126973628998, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11007998883724213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09245080500841141, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08711569756269455, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06377121806144714, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0550612136721611, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05162854120135307, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05078880861401558, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03202136605978012, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027314860373735428, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027018949389457703, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024496544152498245, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023944782093167305, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01743587665259838, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01789381355047226, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01633925922214985, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012926709838211536, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012926709838211536, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012926709838211536, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11956329643726349, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11191877722740173, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10906265676021576, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09878288954496384, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055612072348594666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05271562933921814, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06283247470855713, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05804668366909027, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056396618485450745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05000974237918854, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047502465546131134, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031893808394670486, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027713559567928314, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026612216606736183, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026352960616350174, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01592310704290867, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013699600473046303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013596845790743828, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012569119222462177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012403803877532482, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008313233032822609, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008358842693269253, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007934220135211945, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00551474466919899, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013699600473046303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013699600473046303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09309856593608856, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08717434108257294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0846034586429596, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07667101919651031, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04327094554901123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04086245596408844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04945650324225426, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.045733559876680374, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04390089586377144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.038933515548706055, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03704453632235527, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025034358724951744, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02180701494216919, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020710021257400513, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0204347912222147, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012505811639130116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01063506118953228, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010519234463572502, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009757641702890396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009590418078005314, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006512661464512348, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00649936031550169, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006160159595310688, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004219836555421352, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012505811639130116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012505811639130116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2197917103767395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2062726765871048, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20130757987499237, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1826842874288559, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10278373211622238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09765598922967911, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11610852926969528, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1068153828382492, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10421384125947952, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09255972504615784, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08799208700656891, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05892232060432434, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05104988440871239, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04915963113307953, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.048702653497457504, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02943081595003605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025115886703133583, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024940915405750275, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023014796897768974, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02272709831595421, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015313960611820221, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015004013665020466, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014643726870417595, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009515909478068352, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015313960611820221, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015004013665020466, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22023378312587738, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20000043511390686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19362592697143555, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16818328201770782, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10249608010053635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09518302977085114, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11689970642328262, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10702075809240341, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10468123108148575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08780093491077423, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08103397488594055, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06041662022471428, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05235864967107773, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050376445055007935, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04988838732242584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03086557425558567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027751198038458824, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027623482048511505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025043027475476265, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024745821952819824, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01790439523756504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019302161410450935, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017325423657894135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015462849289178848, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015462849289178848, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015462849289178848, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16490136086940765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15529687702655792, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15222947299480438, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13866066932678223, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07752715051174164, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0740756243467331, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08611328154802322, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07970274984836578, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07852388173341751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07016313076019287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06680941581726074, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04397917911410332, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038261447101831436, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037284206598997116, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03705749660730362, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021982869133353233, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019507670775055885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019433677196502686, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0180522333830595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0179084874689579, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011746808886528015, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012260045856237411, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011423935182392597, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008722114376723766, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011746808886528015, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011746808886528015, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22018608450889587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20745576918125153, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2035139501094818, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18545807898044586, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10318895429372787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09869951754808426, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1144658699631691, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10587696731090546, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10453344136476517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0934406965970993, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08888907730579376, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05811358243227005, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05051054060459137, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.049298934638500214, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.049004927277565, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028992297127842903, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025098133832216263, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025018062442541122, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02308046817779541, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02290288731455803, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015007861889898777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014741125516593456, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014595448970794678, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00925415474921465, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015007861889898777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015007861889898777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2331739217042923, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20862062275409698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19923825562000275, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1763341873884201, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10688390582799911, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09742972999811172, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1251458376646042, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11511756479740143, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11018414795398712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09185176342725754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08630117774009705, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06390448659658432, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055230334401130676, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051518071442842484, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.050597891211509705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03219187259674072, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027158742770552635, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02684972994029522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024246487766504288, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023644903674721718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017598172649741173, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01772647723555565, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016439350321888924, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012640233151614666, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012640233151614666, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012640233151614666, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11526329815387726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.107967309653759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10521118342876434, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09547494351863861, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05364985018968582, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05087431147694588, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06064083054661751, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05600494518876076, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0544128380715847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04827507212758064, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.045965634286403656, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030792327597737312, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026744652539491653, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025676913559436798, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025423932820558548, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015391232445836067, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013196209445595741, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013094599358737469, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012111585587263107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011952158063650131, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008037170395255089, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008027929812669754, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007681000046432018, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0052626547403633595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015391232445836067, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015391232445836067, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09271707385778427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08687126636505127, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08435720205307007, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07648380845785141, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04311025142669678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04073205590248108, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.049147509038448334, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04551149904727936, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04372774064540863, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03880561515688896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03693993389606476, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024899205192923546, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02171177603304386, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020643077790737152, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020383670926094055, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012436691671609879, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010614241473376751, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01050649769604206, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009746711701154709, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009583170525729656, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006498660426586866, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00651022931560874, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006160431541502476, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0042541129514575005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012436691671609879, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012436691671609879, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22484490275382996, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21105685830116272, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20637977123260498, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18732789158821106, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10515642911195755, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10007105022668839, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11840745806694031, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1089724525809288, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10665254294872284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09484308212995529, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09016985446214676, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060107987374067307, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.052057985216379166, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050298914313316345, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04988177493214607, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03002685308456421, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02567972242832184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025527574121952057, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023553814738988876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02328106202185154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01558724045753479, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015299682505428791, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014940041117370129, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009684796445071697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01558724045753479, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015299682505428791, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21009773015975952, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1893259584903717, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18032610416412354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15859133005142212, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09810363501310349, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0892157107591629, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11707118153572083, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10685831308364868, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1006987914443016, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08388473093509674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07826372981071472, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060154665261507034, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05158510059118271, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04759917035698891, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04662175104022026, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03014077991247177, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02533423714339733, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024953197687864304, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02257908321917057, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02198183164000511, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016214735805988312, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016924381256103516, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014909092336893082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012358146719634533, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014909092336893082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014909092336893082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16390615701675415, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.154362291097641, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15129482746124268, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13783089816570282, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07710260152816772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07371299713850021, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0856536403298378, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07920189946889877, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07811220735311508, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06977846473455429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06645028293132782, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043734047561883926, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03807413578033447, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03710928186774254, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036885153502225876, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0218916367739439, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01947370357811451, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01940802112221718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0180371031165123, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017899157479405403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011756232008337975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012317119166254997, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011447152122855186, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008858060464262962, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011756232008337975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011756232008337975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22072160243988037, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20780396461486816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20381556451320648, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18574929237365723, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10341985523700714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09890161454677582, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11475417762994766, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10606563091278076, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1047423779964447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09359163045883179, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08902592211961746, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05823222175240517, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.050610847771167755, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.049408022314310074, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0491250604391098, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02909461036324501, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02519032545387745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025112349539995193, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02317005954682827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022986551746726036, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015140308067202568, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014839914627373219, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014733792282640934, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009379872120916843, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015140308067202568, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015140308067202568, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2325097620487213, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20836219191551208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19907712936401367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1764025092124939, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1067085936665535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09735620766878128, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1250370442867279, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11488907784223557, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10998683422803879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09180055558681488, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08650021255016327, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06389191001653671, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05510886013507843, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051375553011894226, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05047930032014847, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03217354789376259, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02703440934419632, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026714850217103958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02414114400744438, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02353435754776001, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017628075554966927, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017572959885001183, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016459207981824875, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012423846870660782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012423846870660782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012423846870660782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1218518614768982, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11424717307090759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1114024743437767, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10110047459602356, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05682007223367691, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05397096648812294, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06411692500114441, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05916299670934677, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057622358202934265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.051227349787950516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048773545771837234, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03261547163128853, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028294211253523827, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027239566668868065, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026985034346580505, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016315773129463196, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014085567556321621, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01398973073810339, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012961422093212605, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012802965939044952, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008575042709708214, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008678635582327843, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00821690447628498, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005859851837158203, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014085567556321621, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014085567556321621, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09674753993749619, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09068109095096588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08813092112541199, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07995234429836273, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.045046012848615646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.042610809206962585, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05120226368308067, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047384537756443024, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.045684173703193665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04058392345905304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03862893581390381, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025963926687836647, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022623896598815918, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02155657857656479, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021301494911313057, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01297134067863226, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01108875684440136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010977967642247677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010187292471528053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010026331059634686, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006781911943107843, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006781033705919981, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006446167826652527, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0044375150464475155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01297134067863226, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01297134067863226, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23536475002765656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.221400648355484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2165663242340088, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1969345510005951, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11037105321884155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10516510158777237, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12376037985086441, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11407263576984406, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11184178292751312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09962984919548035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09478434920310974, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06289169937372208, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054488398134708405, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052758507430553436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05235015228390694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03138053044676781, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02689700946211815, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026748647913336754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024695493280887604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02442692220211029, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016242152079939842, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015929967164993286, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015608692541718483, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010019022971391678, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015608692541718483, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015608692541718483, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2146003097295761, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19038225710391998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18249791860580444, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15350309014320374, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09894023835659027, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08995507657527924, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1152300164103508, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10493699461221695, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10204386711120605, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08080001920461655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07335027307271957, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05932265520095825, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05113137140870094, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0484406016767025, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04778224229812622, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030232228338718414, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02658938243985176, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02642061933875084, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023253027349710464, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022857720032334328, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01728052832186222, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018480712547898293, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016490021720528603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014627157710492611, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014627157710492611, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014627157710492611, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1614486128091812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15208281576633453, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14911577105522156, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13598467409610748, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07585880160331726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07253158837556839, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08429743349552155, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07794266194105148, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07686646282672882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06871313601732254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06551343947649002, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04297863692045212, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03741279989480972, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03646747022867203, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036246463656425476, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021502016112208366, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01908293552696705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01902022212743759, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01767074130475521, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017534440383315086, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011501346714794636, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011998089030385017, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011195859871804714, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008550404570996761, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011501346714794636, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011501346714794636, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21791291236877441, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2054225504398346, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2015647143125534, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18374380469322205, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10209748148918152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09771154075860977, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11327753216028214, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10470039397478104, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10342603921890259, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09247623383998871, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08812712877988815, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05756595730781555, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04996493458747864, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04878552258014679, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04850493371486664, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02875630557537079, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02487112767994404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024795683100819588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022891735658049583, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02271563932299614, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014992862939834595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014654891565442085, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014596696943044662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009267251007258892, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014992862939834595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014992862939834595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22575663030147552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20136506855487823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19157862663269043, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16941803693771362, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10331787168979645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09365814924240112, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12226693332195282, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11219093948602676, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10670483857393265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08844597637653351, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08336219936609268, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06246786192059517, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.053782690316438675, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.049747541546821594, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04877358675003052, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031441401690244675, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026191474869847298, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02584085613489151, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02329917810857296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022656334564089775, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01717558689415455, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017100822180509567, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01588607206940651, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012085446156561375, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012085446156561375, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012085446156561375, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11863381415605545, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11144153773784637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10869687050580978, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0987510085105896, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055271122604608536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05252788960933685, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06245896592736244, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.057606495916843414, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056020911782979965, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04994640871882439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04765330255031586, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03174055740237236, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027531765401363373, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026463789865374565, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02621360495686531, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01585904136300087, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01360171940177679, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013501115143299103, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012524861842393875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012367073446512222, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008281851187348366, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008266258984804153, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007928559556603432, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005423617083579302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01360171940177679, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01360171940177679, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0948738306760788, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08908629417419434, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08650548756122589, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07857156544923782, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.044140394777059555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04177345708012581, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.050306107848882675, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.046552520245313644, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04475481063127518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03985022008419037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.037987612187862396, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025499749928712845, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022210322320461273, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02112901397049427, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020870449021458626, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012745918706059456, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010880131274461746, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010764900594949722, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010012405924499035, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009849694557487965, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006669242400676012, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006673474796116352, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006331778597086668, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004380489699542522, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012745918706059456, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012745918706059456, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22943666577339172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21609169244766235, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21157203614711761, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19258123636245728, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10749461501836777, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10255926102399826, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1203833669424057, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11094798892736435, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1089048758149147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09722073376178741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09262143075466156, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06115426868200302, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05296453461050987, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05136212706565857, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05099218338727951, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03051793947815895, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02616170048713684, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0260335560888052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024056900292634964, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023814130574464798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01577828638255596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015447953715920448, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015183521434664726, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009677964262664318, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015447953715920448, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015183521434664726, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21430464088916779, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1909434050321579, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18341532349586487, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15089735388755798, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10033378005027771, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09207728505134583, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11496128141880035, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10545824468135834, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10274717956781387, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08176206052303314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07134168595075607, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05921497941017151, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051244717091321945, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04895223304629326, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.048398613929748535, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029835132881999016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02645067125558853, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02629290707409382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02295857109129429, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022609084844589233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01656571961939335, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017877886071801186, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01588614284992218, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013758767396211624, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013758767396211624, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013758767396211624, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16817474365234375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1585509479045868, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1555594503879547, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14187780022621155, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07906081527471542, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07565069943666458, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08775165677070618, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.081142358481884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08007246255874634, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07165145128965378, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06835117191076279, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.044707659631967545, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03891313821077347, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03797835484147072, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03776269033551216, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022401364520192146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019800158217549324, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01973685994744301, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018329061567783356, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01818971149623394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011980942450463772, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012334081344306469, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011682012118399143, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008668314665555954, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011980942450463772, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011980942450463772, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22538641095161438, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21261276304721832, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20870742201805115, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19044163823127747, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10564397275447845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10117338597774506, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11725731939077377, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10828520357608795, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10700739175081253, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09577878564596176, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09135383367538452, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05950024351477623, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.051686763763427734, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05048713833093643, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05021422728896141, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029791558161377907, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02577093429863453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025692351162433624, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023743774741888046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023564646020531654, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01564842090010643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01520655956119299, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015257911756634712, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009675450623035431, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01564842090010643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01520655956119299, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.229674831032753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2040916085243225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19381290674209595, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17120911180973053, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10516653209924698, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09495057910680771, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12410863488912582, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11439771205186844, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10865157842636108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08966439217329025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08442167192697525, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06332170218229294, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05505693703889847, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050788819789886475, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049735717475414276, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031853437423706055, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02695629373192787, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026567768305540085, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023974180221557617, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023284895345568657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01714414730668068, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01789231039583683, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015751194208860397, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012918990105390549, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012918990105390549, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012918990105390549, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11929770559072495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11219025403261185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10944517701864243, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09959162026643753, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055673133581876755, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052934788167476654, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06285827606916428, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05797616392374039, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05641641467809677, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05033959448337555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048052772879600525, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031934142112731934, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027705341577529907, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026655135676264763, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026402993127703667, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015961719676852226, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013725418597459793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01362534612417221, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012647482566535473, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0124895591288805, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008364703506231308, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00836791843175888, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008019722066819668, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005527167581021786, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013725418597459793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013725418597459793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09827547520399094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09239369630813599, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08990512043237686, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08176378160715103, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.045822873711586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.043452441692352295, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05202934890985489, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04808623343706131, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.046426594257354736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04140152409672737, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03950624167919159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02637816034257412, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02294905111193657, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021919013932347298, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02166924998164177, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013178892433643341, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011256352998316288, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011154220439493656, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010365763679146767, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010211859829723835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006878571584820747, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006854130886495113, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006553004961460829, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004458482377231121, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013178892433643341, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013178892433643341, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2327301800251007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21924027800559998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21481317281723022, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19565704464912415, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1090347021818161, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1041683778166771, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12181441485881805, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11229456961154938, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11045902967453003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09868761152029037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09407089650630951, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061833687126636505, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05357838422060013, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05207677558064461, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05171623453497887, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030840769410133362, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026455461978912354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026338253170251846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024326415732502937, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024094335734844208, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015882249921560287, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01550137996673584, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015318810939788818, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009578724391758442, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01550137996673584, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015318810939788818, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2241036742925644, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19306664168834686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18299412727355957, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15270037949085236, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10393228381872177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09199348092079163, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12053027004003525, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11056652665138245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10737114399671555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08309124410152435, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07362592965364456, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.062196243554353714, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05396429821848869, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05096805840730667, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05026174709200859, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031602248549461365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028073187917470932, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02787749283015728, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024287909269332886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023844385519623756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01810535416007042, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019637465476989746, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017225775867700577, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015606832690536976, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015606832690536976, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015606832690536976, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17452964186668396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16459199786186218, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16146817803382874, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14733463525772095, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08203282207250595, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07850024849176407, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09085635840892792, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08413945138454437, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0830819383263588, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07437913119792938, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07089733332395554, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.046312373131513596, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04032173752784729, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03936489671468735, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039142414927482605, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023125797510147095, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020436592400074005, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020372534170746803, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018906572833657265, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018765242770314217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012216451577842236, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012610022909939289, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011908037588000298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008729871362447739, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012216451577842236, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012216451577842236, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22913062572479248, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2161797434091568, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21222369372844696, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1936916559934616, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10741913318634033, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10293152183294296, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11883817613124847, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11006175726652145, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10880892723798752, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09746832400560379, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09280562400817871, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06032632663846016, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0525064691901207, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051315855234861374, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051029305905103683, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030087530612945557, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026124509051442146, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02604502998292446, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02406701073050499, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02388683892786503, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015543855726718903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015321687795221806, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015142902731895447, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009607849642634392, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015543855726718903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015142902731895447, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23282597959041595, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2063758224248886, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19559097290039062, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17254436016082764, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10641995072364807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09582389891147614, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12647928297519684, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11624164134263992, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1100715920329094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09047282487154007, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0851692408323288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06432832032442093, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05566512420773506, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051263824105262756, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.050167061388492584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032247524708509445, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026940232142806053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026540957391262054, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02383190020918846, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02310209907591343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017309201881289482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017585787922143936, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01588541828095913, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012305778451263905, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012305778451263905, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012305778451263905, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11826781928539276, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11114151775836945, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10837125033140182, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09855827689170837, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05512615665793419, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052388932555913925, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.062345024198293686, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05751236528158188, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05586191266775131, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049843549728393555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047605834901332855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03169474005699158, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027496472001075745, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026414738968014717, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026158396154642105, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01586555503308773, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013634268194437027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01353734266012907, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01257123239338398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012413831427693367, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008324390277266502, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008370818570256233, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007971400395035744, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005599253810942173, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013634268194437027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013634268194437027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0985196977853775, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09252485632896423, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08996763080358505, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08179312199354172, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04589581862092018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.043481603264808655, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05231131240725517, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04836048558354378, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04652981087565422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041469771414995193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.039568133652210236, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026518741622567177, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023076355457305908, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021966250613331795, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021702274680137634, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013251131400465965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011290659196674824, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011173807084560394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010395858436822891, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010226059705018997, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006923270877450705, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006897428072988987, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006574657279998064, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004493627697229385, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013251131400465965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013251131400465965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23297908902168274, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2194424569606781, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.214863121509552, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19591739773750305, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10914289206266403, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10422355681657791, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12205169349908829, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11250956356525421, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11056068539619446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09878247231245041, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09416079521179199, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06199568882584572, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05367047339677811, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052124299108982086, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051770444959402084, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03091302514076233, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026521911844611168, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026403583586215973, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02438502572476864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024153517559170723, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015938421711325645, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015589901246130466, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015354393050074577, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00969819724559784, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015589901246130466, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015354393050074577, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20783357322216034, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18215635418891907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17375129461288452, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14392511546611786, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0963950976729393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08674530684947968, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11163836717605591, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10201772302389145, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09947226196527481, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07800869643688202, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06866470724344254, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05744494870305061, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04945499822497368, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04692619666457176, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04631795361638069, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02901652082800865, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02523096837103367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025071974843740463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021739087998867035, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021350353956222534, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01622387394309044, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01695188321173191, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01545137632638216, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012867736630141735, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01545137632638216, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01545137632638216, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17822028696537018, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.168178990483284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16507655382156372, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15071600675582886, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0837734192609787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0802110955119133, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09299536049365997, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08591847121715546, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08485238254070282, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07600525766611099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07261441648006439, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04738051816821098, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04120752960443497, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04024173691868782, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04001413285732269, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02374184876680374, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02097717486321926, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020910007879137993, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019434962421655655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019291870296001434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012733843177556992, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013067751191556454, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012420067563652992, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009185193106532097, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012733843177556992, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012733843177556992, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22826401889324188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21542681753635406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21147359907627106, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19304701685905457, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10704470425844193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10253428667783737, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11863832920789719, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10962136089801788, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10837838053703308, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.097093865275383, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09260574728250504, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060262974351644516, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05233599618077278, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05115655064582825, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05087482929229736, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03014986589550972, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02610253170132637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02602263353765011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02405780367553234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023882072418928146, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015810677781701088, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015387555584311485, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015428343787789345, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009776477701961994, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015387555584311485, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015387555584311485, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23025958240032196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20303986966609955, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19158422946929932, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16890014708042145, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10507895797491074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09402281790971756, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1256009042263031, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11566789448261261, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10889340937137604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08893170952796936, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0837533175945282, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06397056579589844, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055363476276397705, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05059757083654404, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04941236227750778, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03203196078538895, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026551879942417145, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026117615401744843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023395182564854622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02261168882250786, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017167866230010986, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01734716258943081, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015653446316719055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012042693793773651, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015653446316719055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015653446316719055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1140889897942543, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10724538564682007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10456347465515137, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0951174646615982, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05313635990023613, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.050496794283390045, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06014399230480194, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.055514857172966, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.053867727518081665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04806572198867798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04591457173228264, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030531957745552063, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02649461105465889, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025418410077691078, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025163188576698303, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01525547169148922, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013048039749264717, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012940289452672005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012022921815514565, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011858450248837471, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007976582273840904, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007914985530078411, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007624846883118153, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005145109258592129, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01525547169148922, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01525547169148922, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09565360099077225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08986705541610718, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08734433352947235, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07945568859577179, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04450799152255058, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04214787483215332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05077829957008362, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04693063348531723, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.045133642852306366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04024757444858551, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03842679411172867, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02575792372226715, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022403977811336517, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02130916900932789, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021037040278315544, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012868466787040234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010949249379336834, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010835201479494572, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010085477493703365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009919623844325542, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006723690312355757, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006695483811199665, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006373236887156963, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004359712358564138, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012868466787040234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012868466787040234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22553522884845734, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21233826875686646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20785099267959595, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18929672241210938, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10537274181842804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10054770857095718, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11808023601770401, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10890369862318039, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10675108432769775, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09539908915758133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09097975492477417, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05992536246776581, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05191212520003319, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05032947659492493, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049944210797548294, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029885681346058846, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02561230957508087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025478314608335495, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023548245429992676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023305751383304596, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015437851659953594, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015086371451616287, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014856265857815742, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009402950294315815, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015437851659953594, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015086371451616287, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21236105263233185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1816667914390564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.171450674533844, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14341731369495392, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09804601222276688, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08649525046348572, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11503831297159195, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10525330901145935, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10182351619005203, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07723347097635269, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07051139324903488, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05926041677594185, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.050996169447898865, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.047785770148038864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046990178525447845, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029985470697283745, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025795582681894302, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025592369958758354, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021894091740250587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021384909749031067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016878431662917137, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01754879392683506, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015918653458356857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013361524790525436, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013361524790525436, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013361524790525436, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17942465841770172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16937652230262756, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1662169247865677, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1518816351890564, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08424624055624008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08073072880506516, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09349705278873444, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08639731258153915, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08532790094614029, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07647513598203659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07301405072212219, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04764527827501297, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041412971913814545, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040456466376781464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04022597894072533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023867670446634293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02103506028652191, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02096867933869362, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019483113661408424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019342023879289627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012754262425005436, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013012772426009178, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012443738989531994, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009052099660038948, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012754262425005436, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012754262425005436, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2255493551492691, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2128925323486328, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20910406112670898, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1909853219985962, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10567443072795868, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10128092765808105, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11709191650152206, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10825192183256149, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1070096418261528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09593993425369263, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09156649559736252, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05943937227129936, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.051677193492650986, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05051806569099426, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0502474345266819, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029700255021452904, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02581534907221794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025730576366186142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023813173174858093, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023638131096959114, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015523111447691917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015280152671039104, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015136713162064552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009776479564607143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015523111447691917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015136713162064552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23012210428714752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2021659016609192, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19038976728916168, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16757218539714813, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10491860657930374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09364768862724304, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12683536112308502, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11583441495895386, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10887707769870758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08862534165382385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08357112854719162, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06448858976364136, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0556950569152832, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05076742544770241, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04953894764184952, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032432883977890015, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027066607028245926, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02661939524114132, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023964257910847664, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023162156343460083, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01769528165459633, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01823021098971367, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016121797263622284, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013256838545203209, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013256838545203209, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013256838545203209, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1167672872543335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10975220799446106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10695912688970566, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09737245738506317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05440245568752289, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05170023813843727, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0615299791097641, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.056774869561195374, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05513281002640724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04919830337166786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047027476131916046, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03126739710569382, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027126701548695564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02603444829583168, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025772014632821083, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015632281079888344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013402231968939304, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013302858918905258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012359032407402992, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012202308513224125, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008200590498745441, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008185043931007385, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007847657427191734, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005409185774624348, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015632281079888344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015632281079888344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09649484604597092, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09068259596824646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0880727469921112, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08014623820781708, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04488298296928406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04246840253472328, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05141633003950119, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04750138521194458, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04550687596201897, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04060405120253563, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03878308832645416, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026054752990603447, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022671494632959366, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021495111286640167, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0212103184312582, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01303551159799099, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011072052642703056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010945864953100681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010211716406047344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010028974153101444, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006840532645583153, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006817750632762909, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006472757551819086, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004479952156543732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01303551159799099, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01303551159799099, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23505344986915588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22144868969917297, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21700212359428406, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19776643812656403, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11001387238502502, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10510358959436417, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12295330315828323, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11330755054950714, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11145646870136261, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0996459349989891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09497039020061493, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06242421269416809, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054033197462558746, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05251085013151169, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05216594412922859, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031108923256397247, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026687659323215485, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02656319923698902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024539316073060036, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02430862933397293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01603776589035988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015629569068551064, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015467995777726173, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009657450020313263, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015629569068551064, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015467995777726173, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1921083927154541, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16458040475845337, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1557546854019165, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12872812151908875, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08818475902080536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07840065658092499, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10294634103775024, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09430938959121704, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09155179560184479, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07030350714921951, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06152147799730301, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05289188399910927, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04614689573645592, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04338369518518448, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04270389676094055, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026889150962233543, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024065785109996796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023912716656923294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020855367183685303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020443866029381752, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015437697060406208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017083274200558662, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014616092666983604, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013728156685829163, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015437697060406208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015437697060406208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17254282534122467, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1629609614610672, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15995225310325623, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14614319801330566, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08105569332838058, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07766713201999664, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08980792760848999, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08307033777236938, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08207815885543823, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07362806051969528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07023156434297562, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04572661966085434, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039763715118169785, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03884860500693321, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03863370418548584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022846495732665062, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0201072059571743, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020044049248099327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018612058833241463, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018477164208889008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012063887901604176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012302517890930176, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011764167807996273, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008401891216635704, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012063887901604176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012063887901604176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21871379017829895, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2066463977098465, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20290346443653107, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18544863164424896, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10255548357963562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09827066957950592, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11344324052333832, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10498805344104767, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1038142740726471, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09316244721412659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08884027600288391, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.057593606412410736, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05007869005203247, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04896854981780052, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04869658499956131, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02872176095843315, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024911053478717804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024838820099830627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022975919768214226, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02280794270336628, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014854569919407368, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014590286649763584, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014481987804174423, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009126259945333004, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014854569919407368, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014854569919407368, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22059814631938934, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19280856847763062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18095842003822327, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15948189795017242, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10020095109939575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08889483660459518, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12136876583099365, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11127787828445435, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10417386144399643, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0843401849269867, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0798516720533371, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06188656762242317, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.053370069712400436, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04837343841791153, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04713451862335205, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031122924759984016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02561257965862751, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025156734511256218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02256220206618309, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021738100796937943, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016961244866251945, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01709326170384884, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015354481525719166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012160031124949455, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015354481525719166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015354481525719166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12035864591598511, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11353747546672821, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.110953189432621, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1012335866689682, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05620183050632477, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05356840416789055, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06311281025409698, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.058395761996507645, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056930333375930786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05099955201148987, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04868712276220322, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0320090651512146, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027848731726408005, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02686181664466858, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02662861905992031, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015992147848010063, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723660260438919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01363377459347248, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012660132721066475, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012509594671428204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008322209119796753, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008203382603824139, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00799679197371006, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0052336775697767735, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723660260438919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723660260438919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10411060601472855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09813208878040314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09575202316045761, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08737374097108841, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04861678555607796, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.046282801777124405, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05500536039471626, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05074317380785942, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04926247149705887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.044114649295806885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.042199328541755676, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027901824563741684, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024202842265367508, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023234520107507706, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02300763688981533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01392379030585289, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011877389624714851, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011783725582063198, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010958234779536724, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010810588486492634, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007233100943267345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007125925738364458, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006917855702340603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004536100197583437, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01392379030585289, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01392379030585289, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23395349085330963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2209339141845703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21671679615974426, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1979547142982483, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10959061235189438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10491713881492615, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12192854285240173, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11259607970714569, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11100628972053528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09953315556049347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09496860206127167, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06186913698911667, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05367538705468178, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05230552703142166, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05197872221469879, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030857430770993233, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02655627578496933, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02645743265748024, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02448202483355999, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0242724921554327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015884997323155403, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015513104386627674, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015379313379526138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00956171564757824, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015513104386627674, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015379313379526138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2067519575357437, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1685662716627121, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15535949170589447, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12121881544589996, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09409978985786438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07954682409763336, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1123976781964302, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1031169444322586, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09899814426898956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06759953498840332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.061471231281757355, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05777427554130554, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.049818772822618484, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04572780057787895, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04472823068499565, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029073631390929222, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024508167058229446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024243107065558434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019527804106473923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018816908821463585, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016042185947299004, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016585366800427437, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014779685065150261, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01227651722729206, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014779685065150261, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014779685065150261, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.180776447057724, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1708047240972519, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16770173609256744, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1533554494380951, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08492494374513626, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08140409737825394, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09405873715877533, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08699561655521393, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08597135543823242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07720528542995453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07369109243154526, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0478944331407547, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0416361503303051, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04069036617875099, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04046986997127533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023934975266456604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02102932706475258, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0209672674536705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019473135471343994, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01933140493929386, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012629021890461445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012824445962905884, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012322412803769112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008704818785190582, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012629021890461445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012629021890461445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2251274585723877, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2127707302570343, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20903605222702026, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19118750095367432, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10550235211849213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10113883763551712, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11700330674648285, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10800231248140335, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10681276768445969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09595086425542831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09171248972415924, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05943547934293747, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05154036730527878, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05040278285741806, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05014064162969589, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029717931523919106, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025719907134771347, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025641389191150665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023758968338370323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023588942363858223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01558676641434431, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015166480094194412, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015209239907562733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00964205153286457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01558676641434431, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015166480094194412, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22270214557647705, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19505900144577026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18247371912002563, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1610974371433258, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1013394445180893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08962170779705048, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12367186695337296, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11351695656776428, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10539914667606354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0855879932641983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08096331357955933, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06289941072463989, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.054481539875268936, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04897378757596016, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04759305715560913, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03155461698770523, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025966014713048935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025438200682401657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022929832339286804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022027557715773582, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01703675091266632, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01743421144783497, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01530099380761385, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012391201220452785, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01530099380761385, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01530099380761385, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11825110763311386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11135740578174591, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10872351378202438, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09912790358066559, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05520705133676529, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0525301918387413, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06214766204357147, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05747060850262642, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05593074485659599, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.050002261996269226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04773101583123207, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03152981400489807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027430718764662743, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026383785530924797, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026136361062526703, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015751365572214127, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013513332232832909, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01341375894844532, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01245624665170908, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012298809364438057, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00821093749254942, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008129989728331566, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007864488288760185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005229176953434944, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013513332232832909, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013513332232832909, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09912967681884766, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.093365877866745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09094903618097305, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08294123411178589, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.046273138374090195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04392436891794205, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05267058685421944, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04858918488025665, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04688301682472229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041939277201890945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04014863073825836, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026704132556915283, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023178672417998314, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022135451436042786, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021883424371480942, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013346475549042225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01133626140654087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011231024749577045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010458837263286114, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010304354131221771, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00695749232545495, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006857678294181824, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006618022918701172, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004403062164783478, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013346475549042225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013346475549042225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23267331719398499, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21937373280525208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21510963141918182, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1964021623134613, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10893981903791428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1041727066040039, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1213655173778534, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11203712970018387, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11030109226703644, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09879463911056519, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09429065138101578, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06156586483120918, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05341225489974022, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05199846625328064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05166265368461609, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0307100061327219, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026417819783091545, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0263034887611866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024326568469405174, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024111421778798103, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01580093987286091, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015450266189873219, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015274327248334885, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00953450333327055, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015450266189873219, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015274327248334885, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18947511911392212, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15043067932128906, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13572180271148682, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11009286344051361, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08607134222984314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07006927579641342, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10460060089826584, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09610606729984283, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0904378592967987, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06101410835981369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0573405995965004, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05393369495868683, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04654344916343689, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04202381893992424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04090655967593193, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02719985321164131, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022814422845840454, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022477589547634125, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01825406774878502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01747242361307144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015126476064324379, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015932591632008553, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013766231946647167, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012067340314388275, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015126476064324379, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015126476064324379, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18459069728851318, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1744854748249054, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17132355272769928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1567678600549698, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08672915399074554, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08314106613397598, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09597855806350708, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08879867196083069, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08778280764818192, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07884585857391357, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07528755068778992, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048849351704120636, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.042468272149562836, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04153069853782654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04130478948354721, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02440032735466957, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02143864706158638, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021377595141530037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01985197514295578, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019715970382094383, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012857591733336449, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013039613142609596, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012553201988339424, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008814584463834763, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012857591733336449, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012857591733336449, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2267567217350006, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2143452763557434, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2105807363986969, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19267725944519043, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10627245903015137, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10193148255348206, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11750705540180206, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10877732932567596, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10756547003984451, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0966711938381195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09227393567562103, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05966634675860405, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05186764895915985, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.050734762102365494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05046854913234711, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029754813760519028, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025798341259360313, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025723233819007874, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02381664514541626, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0236468818038702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015400753356516361, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015076644718647003, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015028377994894981, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009403948672115803, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015400753356516361, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015028377994894981, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2243480086326599, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.1970827728509903, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18446317315101624, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1633087396621704, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1021050363779068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09054777771234512, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12497635185718536, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11452878266572952, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.106076680123806, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0866403579711914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08212821185588837, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06357378512620926, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05493459478020668, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04938877001404762, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.047995224595069885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03229053318500519, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026256268844008446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025713128969073296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023282933980226517, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022381041198968887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018088318407535553, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017711330205202103, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016436003148555756, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012682637199759483, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012682637199759483, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012682637199759483, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11941072344779968, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11251987516880035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10987227410078049, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10025784373283386, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05576492100954056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05309117212891579, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06312566250562668, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0581248477101326, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0564955472946167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05060107260942459, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0484105683863163, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031990714371204376, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02775450237095356, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026686575263738632, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02642526850104332, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016019480302929878, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013681338168680668, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013579721562564373, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012628260999917984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012467843480408192, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008387383073568344, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008254887536168098, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008029954507946968, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005337625276297331, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013681338168680668, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013681338168680668, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10198905318975449, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09608238190412521, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0936894565820694, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08545544743537903, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.047634582966566086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04530461132526398, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05399448424577713, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04984761402010918, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04826275631785393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04320890083909035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04130803421139717, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027375683188438416, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023781834170222282, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022782325744628906, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02254890277981758, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013673052191734314, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011643961071968079, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011546263471245766, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01073853112757206, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010589521378278732, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0070919468998909, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006991587579250336, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006767278537154198, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00443815253674984, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013673052191734314, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013673052191734314, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2286880910396576, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2157660871744156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2115895301103592, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19314457476139069, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1070861965417862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10242398828268051, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11929365247488022, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11010856926441193, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10842600464820862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0971827432513237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09274128824472427, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060521673411130905, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05251378193497658, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0511217936873436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050800107419490814, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030184365808963776, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025952985510230064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025853503495454788, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023910557851195335, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023704199120402336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015524528920650482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01517638098448515, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015010674484074116, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009358295239508152, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015524528920650482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015010674484074116, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19536259770393372, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15586614608764648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1430925875902176, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12471206486225128, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08708679676055908, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0719870775938034, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1052478775382042, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09479910135269165, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09198594093322754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0665653795003891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06003033369779587, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.053801726549863815, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04636954516172409, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.042936403304338455, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04209145903587341, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02796521969139576, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02390674129128456, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02373833768069744, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020271899178624153, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019735250622034073, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016768453642725945, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017123844474554062, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015837235376238823, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013721234165132046, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013721234165132046, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013721234165132046, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1917884200811386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18136504292488098, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1781114637851715, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16296032071113586, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09012613445520401, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0864165872335434, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09981239587068558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09229744970798492, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09123089164495468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08193575590848923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07828444987535477, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05083385482430458, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04413895308971405, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043163444846868515, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04292905330657959, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025394847616553307, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022240031510591507, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022173887118697166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02058909460902214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020437443628907204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013387427665293217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01345944032073021, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013067272491753101, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009016760624945164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013387427665293217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013387427665293217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2320726215839386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21945294737815857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21558137238025665, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19725735485553741, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10888072848320007, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10442972183227539, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12057961523532867, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11142566055059433, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11020221561193466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09904244542121887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09467953443527222, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06134159490466118, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053204018622636795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05204089730978012, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051774706691503525, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030660128220915794, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026585550978779793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026503169909119606, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02456168457865715, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02439408376812935, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01607581414282322, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015721525996923447, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01569497585296631, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010062916204333305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010062916204333305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010062916204333305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2293412834405899, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20255516469478607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19031964242458344, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16872398555278778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10445168614387512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09319508075714111, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12641336023807526, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11645438522100449, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10834752768278122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08904217183589935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08436398953199387, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06431566923856735, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05566048249602318, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05025281012058258, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04891365393996239, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032229259610176086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026235636323690414, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025714989751577377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02319810725748539, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02230161614716053, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01722993515431881, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017082374542951584, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015516247600317001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011553678661584854, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015516247600317001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015516247600317001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12164775282144547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11458469182252884, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11192958056926727, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10217031836509705, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.056830111891031265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05410263314843178, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06400223076343536, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05914854630827904, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0575840063393116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0515090748667717, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04920772835612297, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03247366473078728, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028226714581251144, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027172023430466652, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026923349127173424, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016216708347201347, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013916628435254097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013818192295730114, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012836558744311333, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012673930265009403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00845189020037651, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008367589674890041, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008109216578304768, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005387276876717806, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013916628435254097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013916628435254097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10297940671443939, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09700930118560791, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09455686807632446, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08631760627031326, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04810350015759468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04571658745408058, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05462462082505226, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05035531148314476, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.048749808222055435, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04363319277763367, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.041747309267520905, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02770073711872101, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024027056992053986, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023016387596726418, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022766267880797386, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013836762867867947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011762474663555622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011662713252007961, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010847017168998718, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010691260918974876, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007185596041381359, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00707236398011446, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006852967664599419, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004494198597967625, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013836762867867947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013836762867867947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2342536300420761, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2208966612815857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21675996482372284, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19797620177268982, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10969207435846329, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10494816303253174, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12205708026885986, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11265797913074493, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11108220368623734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09949414432048798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0949486568570137, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06192069873213768, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05371854454278946, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05234774574637413, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05202107131481171, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030885916203260422, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026573041453957558, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02647203952074051, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024464910849928856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024255875498056412, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01586192287504673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015493418090045452, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015349914319813251, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009517126716673374, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015493418090045452, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015349914319813251, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20193921029567719, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15581060945987701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13877424597740173, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1167660653591156, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08964253216981888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06838749349117279, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11093482375144958, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10213428735733032, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09649115800857544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06630268692970276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05978729575872421, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05698051303625107, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04914366826415062, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04349830746650696, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04208136349916458, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028675967827439308, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023142121732234955, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022778118029236794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018698859959840775, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01768806017935276, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015631547197699547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01576581969857216, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013906188309192657, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01123626809567213, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015631547197699547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015631547197699547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19226914644241333, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18180809915065765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17855660617351532, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16339313983917236, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0903877317905426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08666490018367767, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10016190260648727, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09256777912378311, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09150061011314392, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08220412582159042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0786694586277008, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05101316049695015, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04430675506591797, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043321643024683, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04309278354048729, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02552201971411705, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02239302545785904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022328969091176987, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020739354193210602, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020595161244273186, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013505691662430763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013654503040015697, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01319210510700941, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009275335818529129, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013505691662430763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013505691662430763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2341417521238327, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22135445475578308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21742375195026398, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19901201128959656, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1099221408367157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10544315725564957, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12172488123178482, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11250654608011246, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1112811267375946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09998855739831924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09564251452684402, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06184717267751694, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053742069751024246, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05257324129343033, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05229931324720383, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0308931153267622, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026906747370958328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026828650385141373, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02486969716846943, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024696743115782738, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016184670850634575, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015987129881978035, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015798646956682205, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01032953429967165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01032953429967165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01032953429967165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22719433903694153, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20131295919418335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18874098360538483, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16774782538414001, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10337347537279129, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0922073945403099, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1263229101896286, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11631602793931961, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10729774087667465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08869867771863937, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08425965905189514, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06410897523164749, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05555756390094757, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04974917322397232, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.048307906836271286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0320562943816185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026001939550042152, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02541317418217659, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023111484944820404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022149134427309036, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017021428793668747, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017036907374858856, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015150784514844418, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011495277285575867, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015150784514844418, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015150784514844418, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11973077058792114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11282515525817871, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11009538918733597, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10049764811992645, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055954642593860626, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05326475575566292, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06331001222133636, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05832911282777786, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05671272799372673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05075039342045784, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0485917367041111, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032126668840646744, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0278506837785244, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02677352912724018, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0265125073492527, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016053644940257072, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013693561777472496, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013586794026196003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012624918483197689, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01245800033211708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00835342239588499, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008210232481360435, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00800199992954731, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005233060568571091, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013693561777472496, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013693561777472496, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10321708023548126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09731976687908173, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09492046386003494, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0865989699959755, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048240967094898224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045860495418310165, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.055040497332811356, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.050513606518507004, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04888248071074486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04374375566840172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04203085973858833, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02789793536067009, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02411067858338356, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02309349924325943, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02285235933959484, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013946225866675377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011824041604995728, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011723719537258148, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01090103480964899, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01074935495853424, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0072768814861774445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007110798731446266, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006943459622561932, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045443144626915455, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013946225866675377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013946225866675377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23464836180210114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22157084941864014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21723692119121552, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19846321642398834, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11005671322345734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10527929663658142, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12258961796760559, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11309392750263214, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11146144568920135, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09988705068826675, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09534583985805511, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06226983293890953, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05394429713487625, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05256029963493347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05223315954208374, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031062958762049675, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02677636221051216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0266782958060503, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024672605097293854, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024468213319778442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016050398349761963, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015757840126752853, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015528429299592972, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009891870431602001, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015528429299592972, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015528429299592972, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18578562140464783, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15427839756011963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14201097190380096, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11324711889028549, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08464951813220978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07118292152881622, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10327041149139404, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09433282911777496, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08892054855823517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06432301551103592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05692795664072037, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.053275249898433685, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04590030014514923, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041574444621801376, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040499571710824966, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026891468092799187, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02286890335381031, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02254665456712246, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01919090561568737, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01850571669638157, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015166232362389565, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016276784241199493, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013825410045683384, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01269126869738102, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015166232362389565, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015166232362389565, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19318467378616333, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18259942531585693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17933763563632965, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.164057657122612, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09085331112146378, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08708642423152924, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10056483000516891, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09306363761425018, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09199082106351852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08258432149887085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07889019697904587, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051223210990428925, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.044507771730422974, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04350971430540085, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04327687621116638, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025585701689124107, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022426821291446686, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02235831692814827, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020754586905241013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02060719206929207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013467497192323208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013576536439359188, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013148459605872631, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009096885100007057, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013467497192323208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013467497192323208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23446913063526154, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22166644036769867, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.217748761177063, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19924430549144745, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1099826768040657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10546373575925827, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1217622309923172, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1126137301325798, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11136814951896667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1000451073050499, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09558878093957901, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06181574612855911, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053730111569166183, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05255019664764404, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0522671714425087, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030866187065839767, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02675410732626915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02667991816997528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024701561778783798, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024526380002498627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01604786328971386, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015698816627264023, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015660976991057396, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009859762154519558, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009859762154519558, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009859762154519558, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22662562131881714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2009958177804947, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18873745203018188, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.168113112449646, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1033577099442482, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0923825278878212, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1258518546819687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11593905836343765, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1072470024228096, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08894555270671844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08453544974327087, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06427305936813354, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055618952959775925, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04988447204232216, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.048461973667144775, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03239501640200615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026317385956645012, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025756951421499252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023512795567512512, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022595994174480438, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01754896529018879, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017521966248750687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015719451010227203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01225636713206768, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01225636713206768, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01225636713206768, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12925873696804047, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12182336300611496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11909577995538712, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10869532078504562, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06046822667121887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05763576924800873, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06784837692975998, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06279627978801727, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06127694621682167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05482606217265129, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05231916159391403, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.034384679049253464, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02994585782289505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028903363272547722, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028645917773246765, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017158428207039833, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014736163429915905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014642342925071716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013568335212767124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01341375894844532, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008880717679858208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008753065019845963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008531732484698296, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005512421950697899, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014736163429915905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014736163429915905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1097167432308197, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10339714586734772, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10100352764129639, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09219054132699966, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05134494602680206, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04889284074306488, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05803639069199562, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05350654572248459, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05203389748930931, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046573590487241745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.044569533318281174, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029449008405208588, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02552657388150692, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024561507627367973, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024323174729943275, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014705886133015156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01253464911133051, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012444364838302135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011556306853890419, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011410810053348541, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007624843157827854, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007477511651813984, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007303727325052023, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004718224983662367, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014705886133015156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014705886133015156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23875725269317627, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2253197431564331, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22107014060020447, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20185524225234985, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11189030110836029, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10708945989608765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12439281493425369, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11488878726959229, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11335739493370056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10149366408586502, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09691418707370758, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0631529688835144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054790470749139786, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05342789366841316, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053106412291526794, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03149190917611122, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027133328840136528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027033833786845207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02497508004307747, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02476656436920166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016213510185480118, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01583394967019558, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015719115734100342, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009769441559910774, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009769441559910774, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009769441559910774, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17630678415298462, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13591337203979492, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1190049946308136, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09658197313547134, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07750015705823898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0618070587515831, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09937594830989838, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09153516590595245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08413855731487274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05574783310294151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.053022369742393494, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05107784643769264, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.043962955474853516, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03759090602397919, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03592517971992493, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02563338354229927, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019890960305929184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.019384261220693588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015730610117316246, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014534354209899902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013871630653738976, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013683722354471684, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01186087355017662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009425354190170765, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014534354209899902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014534354209899902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19335821270942688, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1827067732810974, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1794157177209854, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16408446431159973, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09096994251012802, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08714224398136139, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10077463835477829, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0931776687502861, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0921066626906395, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08263599872589111, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07886786758899689, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051315341144800186, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04456127807497978, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043560516089200974, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.043321263045072556, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02564515918493271, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022402405738830566, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022339623421430588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020714767277240753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020566556602716446, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013509055599570274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013508787378668785, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013181773014366627, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008982345461845398, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013509055599570274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013509055599570274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23564352095127106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22264066338539124, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21871797740459442, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2000848352909088, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11061049997806549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10599828511476517, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12234195321798325, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11324501037597656, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11197346448898315, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10048612207174301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09594186395406723, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06216531619429588, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054025281220674515, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05283027142286301, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052541207522153854, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030998073518276215, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026870176196098328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026787638664245605, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024786947295069695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024599717929959297, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016030337661504745, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015715941786766052, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015636075288057327, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009794099256396294, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015636075288057327, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015636075288057327, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2258296012878418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20109206438064575, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1890491247177124, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16858072578907013, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10305501520633698, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09231284260749817, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12534099817276, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1155516505241394, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10678531229496002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08887965232133865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08447764068841934, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06363298743963242, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055178795009851456, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0495469830930233, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.048136644065380096, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031844764947891235, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025773227214813232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02519679069519043, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022955019026994705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02202865108847618, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016861500218510628, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016708675771951675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015037667006254196, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011098488233983517, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015037667006254196, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015037667006254196, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12642526626586914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11908510327339172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1162932962179184, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10604224354028702, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05919995903968811, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05632304027676582, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06672109663486481, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06163909658789635, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05998027324676514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.053621627390384674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05116968974471092, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03385477513074875, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02944258786737919, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028325727209448814, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028057366609573364, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016917314380407333, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014505526050925255, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014398004859685898, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013360155746340752, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01319375354796648, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008820394985377789, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008723986335098743, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008454019203782082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005606541875749826, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014505526050925255, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014505526050925255, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10716887563467026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10087031871080399, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09842032939195633, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08981204777956009, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0501590333878994, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04766390845179558, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05680765211582184, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.052382517606019974, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05081220343708992, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045435186475515366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04344587028026581, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02881760336458683, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024991489946842194, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02398681454360485, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02373402938246727, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014396358281373978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01225191354751587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012153802439570427, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011280319653451443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011130369268357754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007472326513379812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007335019297897816, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007131348364055157, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004643112421035767, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014396358281373978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014396358281373978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23384687304496765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22032169997692108, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21603211760520935, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19727589190006256, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10953739285469055, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10472945123910904, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12195351719856262, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11259293556213379, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11097497493028641, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09930089861154556, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0947045311331749, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06191710755228996, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05367505922913551, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05228760093450546, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051948364824056625, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030848905444145203, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026523299515247345, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026425829157233238, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0243963822722435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024182718247175217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015858376398682594, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0154632069170475, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015338366851210594, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009484795853495598, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0154632069170475, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015338366851210594, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1908651441335678, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13758602738380432, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1158333420753479, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09174464643001556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08012054860591888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05679891258478165, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10745342075824738, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09750332683324814, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08997350186109543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0558989942073822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05420171096920967, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05450029298663139, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.047328732907772064, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03959563747048378, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03756376728415489, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027761785313487053, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022056400775909424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0215877965092659, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01781480759382248, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01644136570394039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01565464772284031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016494615003466606, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013353744521737099, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012687545269727707, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01565464772284031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01565464772284031, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19498980045318604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18411797285079956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18077898025512695, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16529572010040283, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09182059019804001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08794264495372772, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10181804746389389, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09405338764190674, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09295958280563354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08336102217435837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0796140655875206, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05184946209192276, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.045044101774692535, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04402221366763115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04377736523747444, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025991247966885567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02274230495095253, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022671958431601524, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021031111478805542, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020878929644823074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013848412781953812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013843154534697533, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013511264696717262, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009365294128656387, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013848412781953812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013848412781953812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23796366155147552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22480209171772003, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22077101469039917, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20193256437778473, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11178091168403625, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10714159905910492, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12395095825195312, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11449597030878067, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11321800202131271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10153433680534363, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0969938263297081, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06296521425247192, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05470640957355499, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.053477976471185684, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05319000408053398, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031517744064331055, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027330175042152405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027246424928307533, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02522238716483116, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025033853948116302, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01656813733279705, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01618698053061962, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016171040013432503, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010371634736657143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010371634736657143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010371634736657143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23052917420864105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20645250380039215, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19433681666851044, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17354890704154968, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10562673956155777, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09491699188947678, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12823635339736938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11824630945920944, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10918724536895752, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0916290283203125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08706055581569672, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06523310393095016, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05681280046701431, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051049716770648956, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04961446300148964, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032780200242996216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02703574113547802, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026452641934156418, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024322574958205223, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023412657901644707, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017483869567513466, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018107635900378227, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015608038753271103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012838700786232948, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015608038753271103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015608038753271103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12971478700637817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12222728878259659, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11951971799135208, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1090998500585556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.060794733464717865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05794605240225792, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06815622746944427, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06305409967899323, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06158679723739624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05506725609302521, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05254749581217766, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03454721346497536, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030078720301389694, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029047654941678047, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02881273254752159, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017238792032003403, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014801019802689552, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014707960188388824, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013616259209811687, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013462129049003124, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00890316441655159, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008765854872763157, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008557828143239021, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005484649445861578, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014801019802689552, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014801019802689552, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1111396923661232, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10471481829881668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10229069739580154, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09330575913190842, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05206859111785889, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04960139840841293, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.058708544820547104, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.054141685366630554, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05275944992899895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04717462137341499, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.045095294713974, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029801707714796066, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025836316868662834, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024897610768675804, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02467270940542221, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014878802932798862, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012690994888544083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012602584436535835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011683803051710129, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01154533214867115, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0077051459811627865, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007539329119026661, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007389433681964874, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004731907043606043, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014878802932798862, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014878802932798862, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23883609473705292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2253577709197998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22104091942310333, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20177516341209412, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1120312288403511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10715458542108536, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1245182454586029, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11502440273761749, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1134767234325409, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10153622925281525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09686223417520523, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06317874789237976, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05484973266720772, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053481973707675934, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053163010627031326, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03151126578450203, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027123427018523216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02702067419886589, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024932969361543655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02472977340221405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01618252694606781, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015759408473968506, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015681294724345207, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009624870494008064, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009624870494008064, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009624870494008064, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2040657103061676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14721529185771942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12108694761991501, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0990721732378006, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0859975591301918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06493185460567474, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11826105415821075, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10875631123781204, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09794741868972778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.061765361577272415, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.060137614607810974, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061029884964227676, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05258423462510109, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04224381223320961, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03944741562008858, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0307607501745224, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023157676681876183, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02240944467484951, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018788959830999374, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016884395852684975, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01699678972363472, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01729504019021988, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013880090788006783, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012664670124650002, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013880090788006783, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013880090788006783, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19532057642936707, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18438604474067688, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1810099333524704, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1654449999332428, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09194069355726242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08805437386035919, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10198985040187836, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0942290872335434, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09312064945697784, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08340922743082047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07966681569814682, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05200672522187233, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04508465155959129, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04405958205461502, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04380899667739868, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02600834146142006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02272523008286953, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02265815995633602, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020999446511268616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020843198522925377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013782015070319176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013780714012682438, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013454067520797253, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009262030944228172, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013782015070319176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013782015070319176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2383173704147339, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2250421643257141, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2209397703409195, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2019495815038681, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11192820966243744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10721282660961151, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12391632050275803, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11466703563928604, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11335347592830658, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10159517824649811, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09689400345087051, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06295929104089737, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05472955107688904, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.053478240966796875, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053190406411886215, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03142053633928299, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02722085826098919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0271425973623991, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025096558034420013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024904053658246994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016312329098582268, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015967726707458496, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015903543680906296, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010004359297454357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010004359297454357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010004359297454357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22661487758159637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20327551662921906, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1909986138343811, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17050175368785858, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10378743708133698, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09321609139442444, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.128197580575943, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11708611994981766, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10725262016057968, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09020637720823288, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08608072251081467, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06528066843748093, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05609378218650818, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050075799226760864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04857994616031647, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.033135831356048584, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026357358321547508, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025709107518196106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023706790059804916, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022723734378814697, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018370943143963814, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017523599788546562, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016461536288261414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012099622748792171, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012099622748792171, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012099622748792171, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13467630743980408, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12698043882846832, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1241367980837822, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11325322836637497, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06316322088241577, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0601801760494709, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07093625515699387, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0655471682548523, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06398177891969681, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.057177912443876266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.054571494460105896, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.035988546907901764, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03130142763257027, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03020109049975872, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029937226325273514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017980417236685753, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015423227101564407, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015318498946726322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01419681403785944, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01403226237744093, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009334664791822433, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009185084141790867, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008965446613729, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005816007498651743, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015423227101564407, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015318498946726322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11521710455417633, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10858093202114105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10603804886341095, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09674137085676193, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05403821915388107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05145246163010597, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06099905073642731, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.056208763271570206, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.054756224155426025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04895181953907013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046837761998176575, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030979324132204056, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02682381309568882, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025855956599116325, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025615110993385315, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01547978725284338, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01320427656173706, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013113181106746197, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0121581656858325, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012009896337985992, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008045338094234467, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007872683927416801, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0077196448110044, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004982744809240103, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01547978725284338, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01547978725284338, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24172767996788025, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22794415056705475, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22358033061027527, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20405344665050507, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11337227374315262, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1085289716720581, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12602603435516357, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1163729876279831, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11486649513244629, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1027376726269722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09801220893859863, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06397823244333267, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055492084473371506, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05412682145833969, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05380556359887123, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03188805654644966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027473464608192444, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027379341423511505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025259187445044518, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025058524683117867, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016403092071413994, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016007445752620697, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015900198370218277, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009841794148087502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009841794148087502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009841794148087502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21824140846729279, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16320094466209412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13943207263946533, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11257971078157425, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09681306034326553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07380323857069016, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12487667798995972, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11455632746219635, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10470530390739441, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0638500526547432, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0643637552857399, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06418248265981674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05516679957509041, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04714705049991608, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04509282484650612, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03218857944011688, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02532792277634144, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0246772151440382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019277891144156456, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01773093268275261, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017437171190977097, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017854031175374985, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014935212209820747, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012850964441895485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014935212209820747, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014935212209820747, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1904001086950302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1797698736190796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17645947635173798, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16126340627670288, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08966562896966934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08583974838256836, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09933575987815857, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09190331399440765, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09081120043992996, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0813496857881546, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07761455327272415, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05061757564544678, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.043972380459308624, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04296181723475456, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04272785782814026, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02530544623732567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022129129618406296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02206127718091011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020444363355636597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020293401554226875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013334888964891434, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013389193452894688, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012999149039387703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008949115872383118, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013334888964891434, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013334888964891434, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23807403445243835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22479559481143951, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22066368162631989, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20165209472179413, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11190308630466461, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10717322677373886, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12400007992982864, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11462166160345078, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11331641674041748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.101530060172081, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09687349945306778, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06309732049703598, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054746564477682114, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.053501665592193604, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05321669951081276, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031491219997406006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027320152148604393, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027231009677052498, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025182895362377167, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02499312162399292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016434483230113983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016137463971972466, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016026383265852928, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010277168825268745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010277168825268745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010277168825268745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22841796278953552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20504027605056763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19312016665935516, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1727021038532257, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10462910681962967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09429504722356796, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1274598091840744, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11742302775382996, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10814650356769562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09113040566444397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08662254363298416, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06468741595745087, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056223753839731216, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050461072474718094, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04901585355401039, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03248601406812668, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026507003232836723, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025903142988681793, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02385672740638256, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02292393520474434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017410140484571457, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017509447410702705, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015587319619953632, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012062238529324532, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015587319619953632, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015587319619953632, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12975329160690308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12212473899126053, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11929887533187866, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1087859570980072, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06080949679017067, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.057896096259355545, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06827069818973541, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06316947937011719, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06162474676966667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05500989407300949, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05239670351147652, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0345991887152195, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030131394043564796, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029070930555462837, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028817525133490562, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017283014953136444, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014829862862825394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014734199270606041, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01363044697791338, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013470558449625969, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008956436067819595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00880874041467905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00860157422721386, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005542714148759842, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014829862862825394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014829862862825394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11010733246803284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10360824316740036, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10116507112979889, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09221935272216797, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051583025604486465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04907795786857605, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05824318900704384, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.053711023181676865, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05228489264845848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046707089990377426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.044584039598703384, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029559487476944923, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02564118802547455, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02468087710440159, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024454014375805855, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014763187617063522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012609091587364674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012517370283603668, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01159781962633133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011452395468950272, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007673502899706364, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007529526948928833, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007357013877481222, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004767830949276686, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014763187617063522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014763187617063522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24248072504997253, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22847038507461548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22414296865463257, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20448307693004608, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11377733945846558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10876195877790451, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12649983167648315, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11680522561073303, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11527887731790543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10298031568527222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09813813865184784, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06416000425815582, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055711157619953156, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05432179570198059, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05401581898331642, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032001033425331116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027548501268029213, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02745049260556698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02530461549758911, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025098586454987526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016429530456662178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016018889844417572, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01591794192790985, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009779904969036579, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009779904969036579, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009779904969036579, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18596072494983673, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1392035335302353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11751357465982437, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10245901346206665, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08428731560707092, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0642489567399025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10852072387933731, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09941872209310532, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0893055871129036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.059143152087926865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0567881278693676, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.056085918098688126, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04791863262653351, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.040924109518527985, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03914478421211243, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0280148908495903, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.021776139736175537, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021074926480650902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017025813460350037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015718936920166016, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015013016760349274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015106437727808952, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012715299613773823, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010561391711235046, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015013016760349274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015013016760349274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18818479776382446, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1775306761264801, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17415931820869446, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1590150147676468, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08860904723405838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08480265736579895, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09817271679639816, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09086115658283234, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08973456919193268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08029232919216156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07652221620082855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04999380186200142, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04349232465028763, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04248230531811714, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04223727807402611, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025001175701618195, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02190544083714485, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021839898079633713, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02022228203713894, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02007436752319336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013165400363504887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013294818811118603, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012835601344704628, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008933689445257187, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013165400363504887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013165400363504887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23755048215389252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22415199875831604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22003693878650665, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20087632536888123, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11160802841186523, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10684502869844437, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12354584783315659, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11434867978096008, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.113021619617939, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10116815567016602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0964319258928299, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06276330351829529, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05458267405629158, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05332612991333008, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05303628742694855, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03129737079143524, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027140159159898758, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02706167846918106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024987421929836273, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02479623630642891, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016177961602807045, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015907159075140953, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01576194539666176, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009951220825314522, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009951220825314522, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009951220825314522, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23063001036643982, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20682916045188904, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1943751722574234, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17377790808677673, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10564731806516647, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09496551752090454, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1310010403394699, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11917916685342789, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10932641476392746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09200862795114517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08769772201776505, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06624966859817505, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05719226971268654, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051082611083984375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049554817378520966, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.033495236188173294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027071848511695862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026440324261784554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024431137368083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023458188399672508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01837834157049656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018206613138318062, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01644800789654255, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012877393513917923, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012877393513917923, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012877393513917923, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1316300332546234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12378174811601639, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12094909697771072, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11019133776426315, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06167389452457428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.058675944805145264, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06931968033313751, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0641196072101593, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.062496837228536606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05572608485817909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05308234319090843, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.035144176334142685, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030591078102588654, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029488440603017807, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02921939454972744, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017546702176332474, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015049443580210209, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014943892136216164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01382722333073616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013660960830748081, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0090813422575593, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008967297151684761, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008709742687642574, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005660414230078459, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015049443580210209, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015049443580210209, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11066450923681259, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1041194498538971, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10158850997686386, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09259964525699615, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05189003422856331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0493224561214447, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05873847007751465, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.054123684763908386, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05260675773024559, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04691850021481514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04484381526708603, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0298234224319458, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025834962725639343, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02482304908335209, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02457551658153534, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0148951206356287, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012681642547249794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012583581730723381, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011662893928587437, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01150604709982872, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007735108025372028, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0075845373794436455, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007399122696369886, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0048021916300058365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0148951206356287, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0148951206356287, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24776603281497955, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23338668048381805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22884568572044373, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20870277285575867, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11628358066082001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11114861816167831, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1293257474899292, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1193845346570015, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11787078529596329, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10519715398550034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10022842884063721, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06564664095640182, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056962285190820694, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.055532798171043396, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05518767237663269, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03273238241672516, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028196129947900772, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02808992564678192, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025883972644805908, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025666221976280212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01683577336370945, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016437944024801254, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01631075330078602, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010103171691298485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010103171691298485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010103171691298485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20204085111618042, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14640450477600098, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11976540088653564, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09767348319292068, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08496534824371338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06263522058725357, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1188955008983612, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10891303420066833, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09673264622688293, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.059611979871988297, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.060488492250442505, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06133345514535904, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05239180475473404, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04138646647334099, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03837209939956665, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03067430667579174, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022121572867035866, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02123202197253704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01736689917743206, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015159486792981625, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01643621362745762, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016133205965161324, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012912877835333347, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010950866155326366, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015159486792981625, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015159486792981625, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18456794321537018, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17404915392398834, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17079244554042816, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15595583617687225, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08691726624965668, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08316131681203842, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09637906402349472, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08914586156606674, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08807051926851273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0787118449807167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07510094344615936, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04911739379167557, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04267497360706329, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04167149215936661, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.041433535516262054, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024556798860430717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021498462185263634, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021432926878333092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019839584827423096, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019693754613399506, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012965373694896698, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013058232143521309, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012633384205400944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008791353553533554, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012965373694896698, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012965373694896698, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23512619733810425, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.221748948097229, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21767660975456238, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1987030804157257, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11056028306484222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10580457746982574, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12242061644792557, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1132887527346611, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11198274046182632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10015254467725754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09549969434738159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06231869012117386, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05410835146903992, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052863262593746185, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05255637317895889, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03107699751853943, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0269489549100399, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02686789259314537, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024804623797535896, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024617541581392288, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016156762838363647, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01587529480457306, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015742307528853416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010043679736554623, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010043679736554623, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010043679736554623, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22299160063266754, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19996874034404755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18765977025032043, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1679213047027588, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10205098241567612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09159542620182037, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1257721483707428, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11567043513059616, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10566052794456482, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08893644064664841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08475270867347717, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0640631690621376, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05535602569580078, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04922698438167572, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04768265783786774, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03228766843676567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025853855535387993, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025203322991728783, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023281605914235115, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02228703536093235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01752563752233982, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.0171586275100708, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015581903979182243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01175005454570055, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015581903979182243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015581903979182243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1266811192035675, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11916613578796387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11630252003669739, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10594823211431503, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.059396933764219284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05647329241037369, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06696394830942154, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.061875421553850174, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06021504104137421, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05368978902697563, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05115243047475815, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03399011865258217, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0295438040047884, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028410639613866806, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028143959119915962, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016974780708551407, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014511432498693466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014400612562894821, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01332961954176426, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013157655484974384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008802777156233788, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008667592890560627, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008432368747889996, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005482440814375877, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014511432498693466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014511432498693466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10817748308181763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1017284169793129, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09923205524682999, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09038098156452179, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05067359283566475, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04818045720458031, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0572592094540596, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05285857990384102, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05139071121811867, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04581984132528305, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043704804033041, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029072463512420654, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025231359526515007, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024247953668236732, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02401875890791416, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014516601338982582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012388762086629868, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01229759119451046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011389215476810932, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011238264851272106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007536695338785648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007413665298372507, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007210783660411835, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004702826030552387, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014516601338982582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014516601338982582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24008280038833618, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2261432558298111, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22172600030899048, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.202132910490036, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11264369636774063, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10767549276351929, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12526555359363556, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11575093865394592, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.114165760576725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10190115123987198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0970216691493988, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06356392800807953, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0552172027528286, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053813133388757706, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05348135530948639, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03170348331332207, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027285562828183174, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02718500979244709, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025049297139048576, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02483314648270607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016274936497211456, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015861257910728455, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015759766101837158, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009677359834313393, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009677359834313393, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009677359834313393, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1883629560470581, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14599600434303284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12689094245433807, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1032489538192749, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08351753652095795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06408779323101044, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11025868356227875, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09950514137744904, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08985689282417297, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.060447461903095245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.057967204600572586, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05618520826101303, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0483572855591774, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041147563606500626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0392889641225338, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028581222519278526, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022937776520848274, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022325776517391205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018902061507105827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017698079347610474, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016413047909736633, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01703829877078533, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014344624243676662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01322196051478386, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014344624243676662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014344624243676662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18191063404083252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17151682078838348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16824640333652496, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1535244584083557, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08572150766849518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08198793232440948, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09506171196699142, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08792954683303833, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08684773743152618, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07761374115943909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07397200912237167, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04847020283341408, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04210282862186432, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04110017418861389, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04086485877633095, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024222150444984436, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021217823028564453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02115347608923912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019582444801926613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019431067630648613, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012777147814631462, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012909946963191032, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012448291294276714, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00870900135487318, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012777147814631462, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012777147814631462, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2333211898803711, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22001537680625916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21592889726161957, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19705639779567719, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1097780093550682, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10500648617744446, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12188281118869781, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1125466525554657, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11119064688682556, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0994241014122963, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09476318210363388, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062016457319259644, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0537882074713707, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0525287389755249, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052228767424821854, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030993647873401642, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026863452047109604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026786278933286667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024739086627960205, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02454971894621849, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01627974770963192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015951041132211685, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015864873304963112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010273153893649578, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010273153893649578, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010273153893649578, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23125720024108887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20731501281261444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19504894316196442, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1742541491985321, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10594042390584946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0952315628528595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13020122051239014, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11921466886997223, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1096280962228775, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0921502560377121, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0876619964838028, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06607712805271149, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05710276588797569, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05108489841222763, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04955944046378136, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03353404626250267, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026782911270856857, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02615581639111042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024072134867310524, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02309250459074974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018502231687307358, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017672287300229073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016661154106259346, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01207818929105997, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01207818929105997, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01207818929105997, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12861411273479462, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1208692193031311, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11794501543045044, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10732971876859665, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06028839945793152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.057250477373600006, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06787911057472229, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06281646341085434, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06110946834087372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0543658621609211, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05174098536372185, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.034408003091812134, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029977256432175636, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02884933352470398, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028568081557750702, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017182733863592148, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014740203507244587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0146316047757864, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013525951653718948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013356311246752739, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008917239494621754, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008817272260785103, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008544581942260265, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005603593308478594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014740203507244587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014740203507244587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.107152558863163, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10073255747556686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09818178415298462, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08934157341718674, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05027061328291893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04769875109195709, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056965552270412445, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05258599668741226, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05097727105021477, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045367296785116196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043279532343149185, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028947971761226654, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025119595229625702, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02406596951186657, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023822421208024025, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014465157873928547, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012321248650550842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012218627147376537, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011318044736981392, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011159481480717659, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007535313721746206, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00742869870737195, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007191765587776899, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004752773325890303, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014465157873928547, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014465157873928547, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24441245198249817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22998149693012238, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22546188533306122, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20544975996017456, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11476331204175949, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10960014909505844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12765656411647797, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11790849268436432, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11629350483417511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10368193686008453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09864966571331024, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06478599458932877, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05624300613999367, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054805051535367966, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05446527153253555, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03231244906783104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02780347689986229, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027696993201971054, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02549799345433712, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025275949388742447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016594350337982178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016186635941267014, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016053931787610054, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009895019233226776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009895019233226776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009895019233226776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19261258840560913, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14959749579429626, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13247516751289368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0993957668542862, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08633997291326523, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06850409507751465, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10724664479494095, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09886887669563293, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09205595403909683, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05914273113012314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05521628260612488, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05488117039203644, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04739642143249512, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041704680770635605, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04027068242430687, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02744300290942192, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02191007323563099, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021448230370879173, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016698064282536507, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015594291500747204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014564081095159054, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014653951860964298, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012741957791149616, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009986422024667263, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015594291500747204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015594291500747204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17965291440486908, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16927355527877808, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16604651510715485, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.151448592543602, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08466571569442749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08093395829200745, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09384626150131226, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08685589581727982, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08579044044017792, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07660511136054993, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07296894490718842, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04784102737903595, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041592516005039215, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04059438407421112, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04036158323287964, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02389862760901451, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02096560038626194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02089584991335869, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01933569461107254, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01918831840157509, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01258432399481535, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012768338434398174, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012257445603609085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008629201911389828, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01258432399481535, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01258432399481535, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22504116594791412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2121695727109909, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20816916227340698, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18986114859580994, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10588856786489487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10128609836101532, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11743797361850739, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10858256369829178, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10729759186506271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09584953635931015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09124752879142761, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05976405739784241, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05189249664545059, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.050676602870225906, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.050385426729917526, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029845135286450386, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025902284309267998, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025824081152677536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023840855807065964, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023660793900489807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015589388087391853, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015367492102086544, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01518333237618208, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009867722168564796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015589388087391853, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01518333237618208, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22245842218399048, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19820022583007812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18597197532653809, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16605640947818756, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1015043556690216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09092501550912857, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12466425448656082, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11473019421100616, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10551486164331436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08799733966588974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08374647796154022, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06340024620294571, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05488625541329384, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04894557595252991, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04745227098464966, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031802333891391754, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02567588910460472, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02508031204342842, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023019392043352127, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022058973088860512, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01704840362071991, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016971949487924576, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01515984907746315, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01159740425646305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01515984907746315, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01515984907746315, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12444379180669785, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11702074110507965, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11418507248163223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1038743406534195, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05833101272583008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05542483925819397, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06567898392677307, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06080024316906929, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05914395675063133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052629806101322174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05006062984466553, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.033317212015390396, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02902555838227272, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027906615287065506, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027640998363494873, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016637273132801056, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014256083406507969, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01415332779288292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013089025393128395, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01291677076369524, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008635947480797768, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008528555743396282, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008266880176961422, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0054085999727249146, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014256083406507969, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014256083406507969, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1033477708697319, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09713122248649597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09472320228815079, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08615525811910629, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0484316423535347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045987166464328766, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05485132709145546, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.050627369433641434, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04910547286272049, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.043728720396757126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04167107492685318, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027834484353661537, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02416837587952614, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023174172267317772, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022947445511817932, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013906428590416908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011865639127790928, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011773282662034035, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010900704190135002, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010751535184681416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007242195308208466, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007137789390981197, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0069173406809568405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004569135140627623, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013906428590416908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013906428590416908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2415202260017395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2275058627128601, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2229648232460022, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20308950543403625, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11347310990095139, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10842534154653549, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12615209817886353, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11655110120773315, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1149708554148674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10253893584012985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09754012525081635, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06403467059135437, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055611446499824524, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0541924349963665, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053871553391218185, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031950242817401886, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027494024485349655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02739502303302288, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025226809084415436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025011779740452766, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016417499631643295, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016006752848625183, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015901843085885048, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009803567081689835, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009803567081689835, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009803567081689835, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1820167601108551, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14886324107646942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13760331273078918, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10651319473981857, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08237391710281372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07010714709758759, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09864809364080429, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09057596325874329, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08711037784814835, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06104833632707596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05239402875304222, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.050653260201215744, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04354051500558853, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03984157368540764, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.038925185799598694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025362662971019745, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02099655382335186, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.020771944895386696, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0168879684060812, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01623958721756935, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013693522661924362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013834496960043907, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012532858178019524, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009756042622029781, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013693522661924362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013693522661924362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17726996541023254, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.167044997215271, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16382034122943878, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14941418170928955, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08354980498552322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07986921817064285, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09262232482433319, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.085713230073452, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08466678857803345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07557953894138336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07194764912128448, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04716115817427635, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04103608429431915, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040048275142908096, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03981107100844383, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023568177595734596, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020642630755901337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02057594805955887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01902666501700878, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018878325819969177, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012379257939755917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012510815635323524, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012054945342242718, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008382570929825306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012379257939755917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012379257939755917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2175205945968628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20498423278331757, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20112833380699158, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1834028661251068, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1023772731423378, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09787485003471375, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11345186829566956, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10498305410146713, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10371337831020355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09259193390607834, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08820655196905136, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05770297348499298, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05013246834278107, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04894731566309929, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04866034537553787, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028788354247808456, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024953164160251617, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02487281709909439, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022934982553124428, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022757479920983315, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014942185953259468, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014694263227283955, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0145461056381464, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009277084842324257, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014942185953259468, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014942185953259468, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22296026349067688, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19696615636348724, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18476274609565735, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16503478586673737, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10130342096090317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09038122743368149, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12403116375207901, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11422891914844513, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10576644539833069, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08744281530380249, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08314908295869827, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06301487237215042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05460971221327782, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0487835556268692, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.047322824597358704, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031570471823215485, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025454597547650337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02489623613655567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022708024829626083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02174323797225952, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016754524782299995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016646765172481537, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014874324202537537, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011151948012411594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014874324202537537, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014874324202537537, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11973598599433899, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11237038671970367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10944391041994095, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0995141938328743, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.056053102016448975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05312791466712952, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0633932426571846, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.058675363659858704, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05685151740908623, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.050516773015260696, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04805779084563255, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032159510999917984, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028014741837978363, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026818279176950455, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026534682139754295, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016075706109404564, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723784126341343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013611926697194576, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01258906815201044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012410617433488369, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008361796848475933, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008255845867097378, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007979683578014374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005260406993329525, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723784126341343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013723784126341343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09730343520641327, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09128578007221222, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0888211578130722, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08072982728481293, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04553623124957085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04310624673962593, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05175723880529404, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047870032489299774, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.046195466071367264, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041036996990442276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03905694559216499, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026258544996380806, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022847658023238182, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021803582087159157, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021552035585045815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013120625168085098, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011170810088515282, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0110636530444026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010250107385218143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01009137462824583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006841689348220825, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00676138186827302, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006504086311906576, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004333841614425182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013120625168085098, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013120625168085098, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23664738237857819, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2226274609565735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2181582748889923, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1987123191356659, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11112318187952042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10609669983386993, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12381504476070404, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11425674706697464, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11264973878860474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10034079849720001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09548412263393402, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0629030391573906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054524101316928864, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053099844604730606, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05276798456907272, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03135986253619194, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026997612789273262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026891300454735756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024754298850893974, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02453894168138504, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016140855848789215, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01580231636762619, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01561540924012661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009787339717149734, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01561540924012661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01561540924012661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18023985624313354, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1425650715827942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1291586309671402, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10032641142606735, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0812513455748558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06695907562971115, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09900949895381927, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09059752523899078, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08678919076919556, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05712403729557991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05176466330885887, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0510379858314991, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04369253292679787, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03943532705307007, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03837277740240097, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02566256746649742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02098318748176098, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.020738013088703156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016420086845755577, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01564781554043293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013965531252324581, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014161239378154278, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01259977463632822, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010200290009379387, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01564781554043293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01564781554043293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16959819197654724, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15973249077796936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15662819147109985, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14278759062290192, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07989166676998138, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07635606825351715, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08862211555242538, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08202056586742401, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08097606897354126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07223491370677948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06876573711633682, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04517607390880585, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039261769503355026, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03829888254404068, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03807634860277176, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02257171832025051, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019770227372646332, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01970386505126953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018214881420135498, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018072864040732384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011882662773132324, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012024796567857265, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011565454304218292, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008091188967227936, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011882662773132324, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011882662773132324, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20364969968795776, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19184616208076477, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18821054697036743, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17156293988227844, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0958058312535286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09160676598548889, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1062573492527008, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09829919785261154, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09708794951438904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08665726333856583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08248837292194366, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05407198145985603, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.046952154487371445, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.045821502804756165, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04555731639266014, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.026984386146068573, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023377904668450356, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023304101079702377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021487150341272354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02131613716483116, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014033898711204529, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013788416050374508, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013656596653163433, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008734533563256264, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014033898711204529, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014033898711204529, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21618297696113586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.18933194875717163, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.17692962288856506, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15782369673252106, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.0981074869632721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08685066550970078, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12123117595911026, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11087516695261002, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1026579737663269, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08396100997924805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07991816103458405, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06162773445248604, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0531773716211319, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.047462183982133865, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04602177068591118, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030966244637966156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025147799402475357, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02462022937834263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02241307869553566, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021478647366166115, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016717689111828804, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016923408955335617, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014853237196803093, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01194140687584877, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014853237196803093, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014853237196803093, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11178873479366302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10482224822044373, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10200511664152145, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09260449558496475, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0522625669836998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04947050288319588, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05937054753303528, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05481429770588875, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.053003113716840744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04704668000340462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04478373005986214, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030149586498737335, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026196395978331566, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02502608485519886, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024753110483288765, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015080902725458145, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012873723171651363, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012760961428284645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01181759499013424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011643391102552414, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007912716828286648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00786127895116806, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007540141697973013, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005158350802958012, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015080902725458145, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015080902725458145, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09400661289691925, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08817408978939056, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08563174307346344, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07776447385549545, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04394623264670372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04153391346335411, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05022728815674782, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04637929052114487, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.044563520699739456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03959416598081589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0377143956720829, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025497961789369583, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02215430699288845, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02105727232992649, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020795119926333427, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012758747674524784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010830093175172806, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010718763805925846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009945710189640522, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009778762236237526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006696614436805248, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00663002859801054, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006342983804643154, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004324819892644882, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012758747674524784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012758747674524784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22255530953407288, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2091771811246872, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20492124557495117, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18627609312534332, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10442258417606354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09960779547691345, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1164863258600235, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10750127583742142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10584389418363571, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09414902329444885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08952245116233826, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059200625866651535, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051282986998558044, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04989194497466087, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04956304281949997, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029524533078074455, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025383982807397842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025287160649895668, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0232620257884264, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023050688207149506, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01524311862885952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014914136379957199, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014741115272045135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009283594787120819, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01524311862885952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01524311862885952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12002889811992645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10326747596263885, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09567616879940033, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07769699394702911, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055475085973739624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04820000007748604, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0685974732041359, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.062426161020994186, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05696577578783035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04326571151614189, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03932991996407509, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03522350639104843, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030792294070124626, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027732238173484802, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026992343366146088, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01796039566397667, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016074571758508682, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0157603919506073, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01402800902724266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01358105894178152, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010473336093127728, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.012235256843268871, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009578174911439419, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01022335235029459, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01402800902724266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01402800902724266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16073788702487946, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15128538012504578, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1483633816242218, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13519755005836487, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07569096237421036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07228736579418182, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08409243822097778, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07773469388484955, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0767260417342186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06844320893287659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06517179310321808, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04291520640254021, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03726569190621376, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.036332231014966965, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036107949912548065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021465446799993515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018828894942998886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018764223903417587, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01736985146999359, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017234010621905327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011394664645195007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011574680916965008, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011084931902587414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007937584072351456, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011394664645195007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011394664645195007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18905222415924072, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17800651490688324, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17458221316337585, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15916481614112854, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08894917368888855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08498556166887283, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0987902358174324, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09133515506982803, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09015920013189316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08042580634355545, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07653776556253433, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0503447987139225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04365934804081917, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.042586132884025574, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042329978197813034, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025138404220342636, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021818194538354874, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021744895726442337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020070604979991913, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019905654713511467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013186950236558914, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013017648831009865, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012830655090510845, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008440003730356693, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013186950236558914, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013186950236558914, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2090531885623932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.1816970556974411, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.17003843188285828, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15183110535144806, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09493529796600342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.083793044090271, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11648617684841156, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10624366998672485, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.0994565337896347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08063696324825287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07673592865467072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05951269343495369, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0511590838432312, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04606444388628006, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04479798674583435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030100340023636818, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024673672392964363, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.024240590631961823, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.021955139935016632, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021141525357961655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0166921503841877, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016834866255521774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015116887167096138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012311779893934727, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015116887167096138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015116887167096138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10933876037597656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10270211845636368, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09998389333486557, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09084180742502213, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05118207260966301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048490896821022034, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05839700996875763, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05373752862215042, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05189114809036255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046164605766534805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.044092122465372086, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029657676815986633, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025686297565698624, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024532675743103027, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02426185831427574, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014859016053378582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01261498685926199, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012498477473855019, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011593800969421864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011417718604207039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007815098389983177, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007692386396229267, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007451863959431648, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0050191027112305164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014859016053378582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014859016053378582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09027672559022903, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08477318286895752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08235728740692139, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07483018934726715, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04224760830402374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03993561491370201, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.048342738300561905, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0446554534137249, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04284553602337837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03808680176734924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03627679497003555, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02453717030584812, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021327901631593704, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020250229164958, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019987337291240692, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01228710450232029, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01041458360850811, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010299037210643291, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009565811604261398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00940305832773447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006457902491092682, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006379372905939817, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006108459085226059, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004156715702265501, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01228710450232029, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01228710450232029, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22854529321193695, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21498499810695648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21064579486846924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19171521067619324, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1073080375790596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10246016830205917, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11946430057287216, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11025959253311157, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10876588523387909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09683068096637726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09209276735782623, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06066859886050224, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05261655151844025, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05126679316163063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05094362050294876, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03026648238301277, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02608657069504261, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025993360206484795, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023914538323879242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023713894188404083, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015589645132422447, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015308867208659649, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01508971955627203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009528438560664654, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015589645132422447, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01508971955627203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1345375031232834, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11169203370809555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10216592997312546, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08184927701950073, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06035898998379707, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05224823206663132, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0760149136185646, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06958377361297607, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06440974771976471, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04648662731051445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04192350059747696, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03919261321425438, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03392001986503601, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02985658496618271, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028825057670474052, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01991783082485199, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01675155758857727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.016444966197013855, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014321665279567242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013679280877113342, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011336779221892357, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.012393549084663391, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010141255334019661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009860522113740444, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014321665279567242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014321665279567242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1336803287267685, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1258753538131714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.12335581332445145, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1124463602900505, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.06326310336589813, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.060438670217990875, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07023631036281586, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.06497331708669662, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.06412457674741745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05724042281508446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.05459938570857048, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03623806685209274, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03172891214489937, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.030965711921453476, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.030778346583247185, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.018272824585437775, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017056066542863846, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017007526010274887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.015949873253703117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.015848107635974884, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010362613946199417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011808331124484539, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010131003335118294, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009518058970570564, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010362613946199417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010362613946199417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1515822410583496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14272701740264893, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.13994258642196655, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12782356142997742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07173623889684677, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06855063140392303, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07986018061637878, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07364307343959808, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07271125167608261, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06495294719934464, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.062043581157922745, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04120704531669617, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03596898540854454, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.035116370767354965, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.034917451441287994, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02085869386792183, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019370976835489273, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019313843920826912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01812463067471981, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018005801364779472, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011955120600759983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013410939835011959, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011697732843458652, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010829640552401543, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011955120600759983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011955120600759983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.15652161836624146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.13439396023750305, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1254788190126419, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.11151937395334244, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.0709870457649231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.06219358369708061, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.08797427266836166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.07945400476455688, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.07442279160022736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.059812020510435104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.057103563100099564, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.045107923448085785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.03883045166730881, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.03496745973825455, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.03404736891388893, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.02315242402255535, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01957758516073227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.019234711304306984, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01757388934493065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.01696249470114708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.013637298718094826, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01424270961433649, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.012357688508927822, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011310583911836147, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.013637298718094826, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.013637298718094826, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10310821235179901, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09676172584295273, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09402769058942795, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0855177640914917, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04813633859157562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045520998537540436, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.055354245007038116, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.050794605165719986, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04886089265346527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04344790801405907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04159194231033325, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028105536475777626, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024267200380563736, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023058274760842323, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022773023694753647, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014064598828554153, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011863067746162415, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011740465648472309, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010916761122643948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010732308961451054, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00740261934697628, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007268137764185667, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007013336289674044, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00475698197260499, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014064598828554153, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014064598828554153, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08053074032068253, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07559973746538162, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07334744185209274, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06663903594017029, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03759574145078659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03546753525733948, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.043269477784633636, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03992148116230965, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.038137856870889664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03391224518418312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03234225511550903, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.021970976144075394, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019081657752394676, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018025444820523262, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.017775405198335648, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0110047347843647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009304617531597614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009191269986331463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008559172041714191, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008399336598813534, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005818168632686138, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005762959364801645, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005479338113218546, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003807955887168646, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0110047347843647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0110047347843647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21350771188735962, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20066337287425995, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1965872347354889, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17903339862823486, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10025198757648468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09560483694076538, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11327316612005234, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10323713719844818, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10162556916475296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09065651893615723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08671537786722183, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.057681161910295486, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04954686015844345, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04809310659766197, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04771940037608147, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029184598475694656, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02476716972887516, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024643205106258392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02280929684638977, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022574927657842636, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015326760709285736, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014923862181603909, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014752613380551338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009729327633976936, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015326760709285736, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015326760709285736, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10456958413124084, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07672525942325592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06604012846946716, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.059429753571748734, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.046381138265132904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.034836187958717346, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.060174789279699326, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05261130630970001, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049900900572538376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03271136060357094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03255438059568405, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0313204750418663, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02624346874654293, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023504041135311127, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022818898782134056, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016507990658283234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0139192845672369, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013772455044090748, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011726281605660915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011311345733702183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010193143971264362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.010830895975232124, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009413917548954487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009213367477059364, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0139192845672369, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0139192845672369, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.11875869333744049, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1117323487997055, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.10956680774688721, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.09988520294427872, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.05574474856257439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.05324440076947212, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06202537938952446, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.057272110134363174, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.056503523141145706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.050403788685798645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.048075269907712936, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03159602731466293, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.027414929121732712, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.026734312996268272, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.026565948501229286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01580216735601425, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.013840186409652233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.013793252408504486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.012763350270688534, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.012659767642617226, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.008363259956240654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.008485114201903343, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.00813320092856884, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.005790180526673794, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.013840186409652233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.013840186409652233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.11922353506088257, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1122065857052803, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.11002721637487411, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.10034994781017303, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0563276968896389, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.05382771044969559, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06274999678134918, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.057850368320941925, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.05708220601081848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05095374956727028, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.048629265278577805, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03230659291148186, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.02807946689426899, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.027395455166697502, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.027234699577093124, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0162977185100317, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.014815588481724262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.014772791415452957, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.013802962377667427, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.013705243356525898, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.009151780977845192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.009934045374393463, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.008934489451348782, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007727102376520634, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.014815588481724262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.014815588481724262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.06615719199180603, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.0563923642039299, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.05165039747953415, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.045769624412059784, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.030140548944473267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.025833070278167725, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.039676129817962646, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.03499327972531319, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.03169520944356918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.02533142827451229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.024366345256567, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.020537858828902245, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.017411718145012856, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.015265805646777153, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.014711566269397736, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.010659663937985897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.009097772650420666, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.00890104565769434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.008310851640999317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.007998298853635788, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0064046201296150684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.007193233352154493, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.005715166684240103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.006113196723163128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.015265805646777153, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.015265805646777153, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } } ], "last_module_idx": 82, "base_perplexity": 8.391836633786518 }