diff --git "a/8.04bpw/measurement_L3-8B-Stheno-v3.2_exl2_8.04bpw_rpcal_mk2.json" "b/8.04bpw/measurement_L3-8B-Stheno-v3.2_exl2_8.04bpw_rpcal_mk2.json" deleted file mode 100644--- "a/8.04bpw/measurement_L3-8B-Stheno-v3.2_exl2_8.04bpw_rpcal_mk2.json" +++ /dev/null @@ -1,62439 +0,0 @@ -{ - "measurement": { - "model.layers.0.self_attn": [ - { - "accuracy": 0.8804031577892601, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9013232276774943, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9085066812112927, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9428208020981401, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9449676990043372, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.945485987700522, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9626352561172098, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9637914341874421, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967025121441111, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9690954081015661, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9720384643878788, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973667741753161, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9759789332747459, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780040017794818, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868182956706733, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888542857952416, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900459734490141, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935194446879905, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972612112178467, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.0.mlp": [ - { - "accuracy": 0.8349935822188854, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8389760348945856, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.862701608799398, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.870522812474519, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9187108632177114, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9255449851043522, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.935536548960954, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9577516091521829, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9617588648106903, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9588020560331643, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9640255717094988, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789232999319211, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820370672969148, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884610884473659, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892484365263954, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915057097095996, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968163690064102, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.self_attn": [ - { - "accuracy": 0.8575311386957765, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8665239987894893, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8778630066663027, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9202908617444336, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.929060397669673, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9299672557972372, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9506496516987681, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9519448562059551, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9548311368562281, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9568049523513764, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633483877405524, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9660144585650414, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667827137745917, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9697270562173799, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810916053829715, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845879388740286, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853252965258434, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919009707518853, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959888568992028, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.mlp": [ - { - "accuracy": 0.8808759157545865, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8996027326211333, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9085555630736053, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9089674283750355, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620415985118598, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832470030523837, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984708352771122, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925357644679025, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934462936362252, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928256045386661, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936666735447943, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965085684234509, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970461263874313, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976012742990861, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977202253503492, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980728628652287, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991983111285663, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.self_attn": [ - { - "accuracy": 0.9816921867895871, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814434787258506, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842705265036784, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868249624269083, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888317447039299, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989891123957932, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927026267687324, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929516452539247, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935500827559736, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937090086750686, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934559141402133, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939992198778782, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995416173391277, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957733133051079, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974525368597824, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997891857943614, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978322642855346, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991134993069863, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994124494260177, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.mlp": [ - { - "accuracy": 0.9657008931972086, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9665970089845359, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9714757140027359, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9730602194322273, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826456358423457, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840178011800162, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861380633665249, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909474119194783, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917734023474623, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911575628211722, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922467573778704, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954936762223952, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996135081149987, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975476119143423, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977030091540655, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981529067299562, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999336935201427, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.self_attn": [ - { - "accuracy": 0.9561571597587317, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9590552176814526, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9681453909724951, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715386754833162, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794611205579713, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9801677281502634, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885880072251894, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896547779790126, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904004095005803, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906665083835833, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898118214332499, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907992128282785, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930886024376377, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939201180823147, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996208521828521, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968296276347246, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969752437318675, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986723948604777, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992275513795903, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.mlp": [ - { - "accuracy": 0.9535393267869949, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.954771225573495, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9613292459398508, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633742580190301, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764872086234391, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783528147963807, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811113633913919, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878023589844815, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988857152115088, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880322070093825, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894993921625428, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939097560709342, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947689290856943, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966937319841236, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996897506120149, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974764624348609, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991206492559286, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.self_attn": [ - { - "accuracy": 0.9550526740495116, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9592400589026511, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651944248471409, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9707462404621765, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9781212207162753, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786253635538742, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876667494536377, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878356585977599, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886513503734022, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989079468534328, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887854635599069, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897182291024365, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916151034994982, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922426242846996, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954095905413851, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961956042534439, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964055437303614, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982709458126919, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990211132098921, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.mlp": [ - { - "accuracy": 0.9396422866266221, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9414038246031851, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9507630956359208, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9535388224758208, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9696773898322135, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.972165287588723, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9759005096275359, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842821434140205, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856396308168769, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845618393155746, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864943515858613, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921310301870108, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932669747213367, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957165525993332, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959846216079313, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967968690179987, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988527505338425, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.self_attn": [ - { - "accuracy": 0.9528842049185187, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9552474014926702, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9639198447111994, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9702969992067665, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770500097656623, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772878627991304, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871637590113096, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875455237342976, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886113212560304, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891880929935724, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881330067873932, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894920481019653, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914492624811828, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922108871396631, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954259670630563, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960843871813267, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966218197514536, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980975669750478, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990951370709809, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.mlp": [ - { - "accuracy": 0.9284384357742965, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9305623122490942, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9415868357755244, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9448275074828416, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641037087421864, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9670062463264912, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9714367178967223, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981411904329434, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829686041921377, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817012147977948, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839799947221763, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990663125354331, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920129484962672, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949064074316993, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952231214847416, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996153649277403, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986421076100669, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.self_attn": [ - { - "accuracy": 0.9475139633286744, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9490168509073555, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9566685280296952, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667757111601532, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734816446434706, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739702952792868, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854590545874089, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985990549554117, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870283769560046, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877413783106022, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865033138776198, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877944593899883, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896595279569738, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905315818614326, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943996004003566, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952513777534477, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962407663406339, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976325067545986, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990155792984297, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.mlp": [ - { - "accuracy": 0.9195504109375179, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9217977859079838, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9346284922212362, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9382985029369593, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9596188168507069, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.962879559956491, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9679857467999682, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791843325365335, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809028498129919, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794081576401368, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819472333183512, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894938126089983, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909998092916794, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994312558992533, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946324641932733, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957351724151522, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984885012599989, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.self_attn": [ - { - "accuracy": 0.9417967915069312, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9440178882796317, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9523833568673581, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9637554716318846, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970809421967715, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9716403685742989, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839731694664806, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843638855963945, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985800500435289, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864795070025139, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854254755773582, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867450183955953, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883261785143986, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891759186866693, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936774630332366, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945937096490525, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959536735113943, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997267517785076, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989193039946258, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.mlp": [ - { - "accuracy": 0.9142036018893123, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9165080138482153, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9304002174176276, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9343528528697789, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9566700484137982, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9602219515945762, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9658126090653241, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776467359624803, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979530074284412, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778522645356134, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806143344612792, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886730705038644, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903137010405771, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993874095758656, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941995422414038, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954367991012987, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983741533505963, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.self_attn": [ - { - "accuracy": 0.9362645742949098, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9391311269719154, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9459838941693306, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9593297170940787, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967776597244665, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683253519469872, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822204115334898, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826485117664561, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840286793769337, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848792953998782, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837994298432022, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848315723938867, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865470991353504, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877828465541825, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927546908438671, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938409014139324, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995395476318663, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968358880432788, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987672804054455, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.mlp": [ - { - "accuracy": 0.9116545263677835, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9140824261121452, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9279376515187323, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9318535849452019, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9552735548932105, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9590595862828195, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9646168719045818, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769817651249468, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788968710927293, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770466929767281, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800083017908037, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988227516412735, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900060868822038, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936344017914962, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939576972974464, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951563230133615, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983034126562416, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.self_attn": [ - { - "accuracy": 0.9303289782255888, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9310156987048686, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9391094823367894, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.951905615394935, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641405229922384, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9642022126354277, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803242458729073, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802390664117411, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823005399666727, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834371373290196, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816855104872957, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983126234728843, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985178163042292, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865698832436465, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918669369653799, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931870301952586, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946471041766927, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966032917873235, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986040562143899, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.mlp": [ - { - "accuracy": 0.9095869874581695, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9121592021547258, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9262393293902278, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9302122197113931, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9543755226768553, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9581395003478974, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9638042566366494, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764821303542703, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784123100107536, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766609005164355, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795924287755042, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880639953771606, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898021685075946, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935323869867716, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993884691153653, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951113415590953, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998275574624131, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.self_attn": [ - { - "accuracy": 0.9327306388877332, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9355807169340551, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9432510540354997, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9566696253605187, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9660222607199103, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9661303972825408, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811523914104328, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816475231200457, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829183424590155, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838376054540277, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829734418308362, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983756784000434, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857851120177656, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871397400274873, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923768903245218, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934993709321134, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995065207243897, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966409276676131, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986820575650199, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.mlp": [ - { - "accuracy": 0.9079944114200771, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9104806887917221, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.925063893198967, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9292103475891054, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9535096185281873, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9572944291867316, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9632496256381273, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761155518935993, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780592659953982, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762196551309898, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791958070127293, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878492432762869, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896114530856721, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934491635940503, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937729424855206, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950414935301524, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982612261374015, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.self_attn": [ - { - "accuracy": 0.932990285102278, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9347347323782742, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9410897414200008, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9537934218533337, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9658503271639347, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9664164811838418, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814142197137699, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816415782552212, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829412198159844, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984041485353373, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830101719126105, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841031403047964, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858213263796642, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868087944341823, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923304167459719, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935323735408019, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951931815012358, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965958612592658, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987141696256003, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.mlp": [ - { - "accuracy": 0.9055180568248034, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.90810242947191, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9232625076547265, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9275984964333475, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9520573234185576, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9560267913620919, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9622259391471744, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9752441392047331, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772859956137836, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754221753682941, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785045507596806, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874035904067568, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892543788300827, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931822268408723, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935477299732156, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948978661559522, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998207139877195, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.self_attn": [ - { - "accuracy": 0.923155277967453, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9264815915375948, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9369402925949544, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9533197686541826, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9618916679173708, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.962114071007818, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788096104748547, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794937268598005, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808724178001285, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817147718276829, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808973957551643, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821680046152323, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844066562363878, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856260283268057, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915257635875605, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927923533832654, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944195624848362, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962059136887547, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985321225540247, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.mlp": [ - { - "accuracy": 0.9024418513290584, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9051679279655218, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9208563123829663, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9254146534949541, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9503915901295841, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9546018319670111, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9610239511821419, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9742419936228544, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764394595986232, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974590587313287, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778206673217937, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869918533950113, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889009551261552, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992900597571861, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933493112039287, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947977993579116, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981224728326197, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.self_attn": [ - { - "accuracy": 0.9188558696769178, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9209996410645545, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9314938844181597, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9473353563807905, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9590019474271685, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9594525755383074, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777098759077489, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782159213209525, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796644668094814, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808237176621333, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797580317826942, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811360915191472, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832646925933659, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844350725179538, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909655580995604, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922273439879064, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942540965857916, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995959560124902, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984761958403396, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.mlp": [ - { - "accuracy": 0.8992252601310611, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.902040334418416, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9181497837416828, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9227616903372109, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9490662240423262, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9532559919171035, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.959764378843829, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736005538143218, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758163000224158, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739580008899793, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772348734550178, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866969469585456, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886298846104182, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927600687951781, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932124225306325, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946316350542475, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998075806957786, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.self_attn": [ - { - "accuracy": 0.9191241371445358, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9212238104082644, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9304740871302783, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9460823473054916, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9596853868570179, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9597862958908081, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774377535795793, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779833917273208, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979389455053024, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980617037625052, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9798125787638128, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811012696009129, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829263399587944, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844319049734622, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990761403052602, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921916346356738, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941169966477901, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958513068268076, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998443302421947, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.mlp": [ - { - "accuracy": 0.8924462357535958, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8955350825563073, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9127827491611242, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.917919397354126, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9455687068402767, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9500870581250638, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9571438839193434, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9716538501670584, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974037628271617, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.972185313818045, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.975684650358744, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857757730060257, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878526878310367, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922134951630142, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992750525008887, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942905490461271, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979256452497793, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.self_attn": [ - { - "accuracy": 0.9138010963797569, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9168393285945058, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9280363977886736, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.94497224339284, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9566227693576366, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9572304931934923, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757850869791582, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761714577907696, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978632174897939, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797707895049825, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978409459348768, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9801268721930683, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822484292089939, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838571097934619, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904317008913495, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919080999679863, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993770915287314, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995888728502905, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983691137458663, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.mlp": [ - { - "accuracy": 0.8883092859759927, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8915300653316081, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9094235706143081, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9147203164175153, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9438513240311295, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9484164323657751, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9556139453779906, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970810059690848, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973203101195395, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9713657358661294, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748993708053604, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853825523750857, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874665295355953, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920159967441577, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925497101794463, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941020662081428, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997863208205672, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.self_attn": [ - { - "accuracy": 0.9266313700936735, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9308670368045568, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9410192049108446, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9551267127972096, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633581235539168, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641401055268943, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792031748220325, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799882248044014, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811814418062568, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819703928660601, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98182263225317, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829628311563283, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852757518528961, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864565607276745, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919198102434166, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931967847514898, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945271820470225, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964501959329937, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985482176052756, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.mlp": [ - { - "accuracy": 0.8895203708671033, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8925445326603949, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9104315033182502, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9157501826994121, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9443617421202362, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9489429004024714, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9561746730469167, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711298589827493, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735709751257673, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715709548909217, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751496647950262, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854678125702776, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875930474954657, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920879286946729, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925874763575848, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941371100721881, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978762047539931, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.self_attn": [ - { - "accuracy": 0.9303524261340499, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9336276389658451, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.944801913574338, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9572803350165486, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9650800433009863, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9659866692963988, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9798456011340022, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806442966219038, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821835157927126, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831037357216701, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823883203789592, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834551904350519, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860397492302582, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870773715083487, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923130868992303, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935679733753204, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947520371642895, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967114757600939, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986443210000289, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.mlp": [ - { - "accuracy": 0.8902710978873074, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8932245564647019, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9110278417356312, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9163360283710063, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9448009091429412, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9493386845570058, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9565197739284486, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712644530227408, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737215514760464, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9718266230775043, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753451638389379, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855961288558319, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876814330928028, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921256975794677, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926560308667831, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942007617210038, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978674712765496, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.self_attn": [ - { - "accuracy": 0.9432180181611329, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9448158754967153, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.954718274762854, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9634501973632723, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712194114690647, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9716156475478783, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826868529198691, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835778861306608, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984985409246292, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857731888769194, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857551510212943, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866965374676511, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885615865932778, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98956004017964, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937004882085603, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948024096956942, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954493863915559, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997327212113305, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988130683705094, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.mlp": [ - { - "accuracy": 0.8956121960654855, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8983332882635295, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9146136594936252, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9196281400509179, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9474087047856301, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9515665811486542, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9583402753341943, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726697179721668, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749848410720006, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731686501763761, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764565841760486, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863049986306578, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882534475182183, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925459040969145, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930162391683552, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944390143791679, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979849145311164, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.self_attn": [ - { - "accuracy": 0.9438548658508807, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9458216051571071, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9564517298713326, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9632418332621455, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715989978285506, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724696272751316, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831586577929556, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832336044637486, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848524546250701, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854651725618169, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985322100634221, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869433541898616, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894832923891954, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902639734209515, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941546261543408, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951259473746177, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954859368153848, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975702354713576, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988130810888833, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.mlp": [ - { - "accuracy": 0.8975876630283892, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9002188164740801, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9157565021887422, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9206045395694673, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9482653804589063, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9523925371468067, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9589597200974822, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731463949428871, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753993387566879, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736399594694376, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768992672907189, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865646086400375, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884730007033795, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926622509665322, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931633806845639, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944934312079567, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980164124790463, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.self_attn": [ - { - "accuracy": 0.9471925687976182, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9495594913605601, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.95912009710446, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9675813179928809, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734462997876108, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736148536903784, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839808398974128, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844772049109451, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857922223163769, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862681521335617, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98609402787406, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986982949601952, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895529889618047, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990380187286064, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943532541801687, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951645454857498, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957865636388306, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975150811369531, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989080153463874, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.mlp": [ - { - "accuracy": 0.898441422265023, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9009941006079316, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.916181921493262, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9209949686191976, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9486249841284007, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.952658045804128, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9591081333346665, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973252653493546, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755383328301832, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.97383355605416, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770271938759834, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866610068129376, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885442946106195, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927413436234929, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932094686955679, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945334898075089, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980508047665353, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.self_attn": [ - { - "accuracy": 0.9420176281128079, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9437724980525672, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9532217986416072, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9626630663406104, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711379464715719, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711531202774495, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827351241838187, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833789604017511, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847511911648326, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854682537843473, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985622491221875, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867752736317925, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886255451128818, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895181343890727, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937899258802645, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946514408802614, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954737021471374, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972224549564999, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987842498667305, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.mlp": [ - { - "accuracy": 0.9033355806022882, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9058350748382509, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9197285491973162, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9241764028556645, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9510827788617462, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9550002824980766, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9608385672327131, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744069672888145, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766210084781051, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750581844709814, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978152358555235, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987281171081122, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890934986178763, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929889664344955, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935330491280183, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947293129807804, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980858319904655, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.self_attn": [ - { - "accuracy": 0.9489718582481146, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9511142035480589, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.960512105608359, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9685926663223654, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9747358096065, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757104760501534, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846093003870919, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853698424994946, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864888287265785, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871131890686229, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872453097486869, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880550876259804, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901124030002393, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908405243768357, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945210682344623, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954143211361952, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959209249063861, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997625898453407, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998960557342798, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.mlp": [ - { - "accuracy": 0.9080480518750846, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9104181504808366, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9233700912445784, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9275161870755255, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.953611179953441, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9572221462149173, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9626461886800826, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.975807135575451, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778653332032263, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763903664425015, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792749716434628, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879890612210147, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896697130170651, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934343521890696, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938969612994697, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949881149223074, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998200120011461, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.self_attn": [ - { - "accuracy": 0.9519027802161872, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9526669431943446, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9634648773353547, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970623598783277, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758686720160767, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763791238656268, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854820494074374, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861577014671639, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870908316224813, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876285044010729, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874638332985342, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878555096802302, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906628535827622, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914874540409073, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948173740122002, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956529007176869, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959772396250628, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978513904497959, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989082470638095, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.mlp": [ - { - "accuracy": 0.9127925275824964, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9149582823738456, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9269215664826334, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9307636301964521, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9559050651732832, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.959363671252504, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9644055676180869, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770272222813219, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789737364044413, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9775669559603557, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803077358519658, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885906085255556, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901880419347435, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937611357599963, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941933080554008, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951908067450859, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982640011585318, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.self_attn": [ - { - "accuracy": 0.946176910540089, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9451239344198257, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9611281338147819, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668239669408649, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9710525156697258, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737544059753418, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834370421012864, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984974161896389, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865982020273805, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869531322619878, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865162672358565, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870246397913434, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990419871523045, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914696214254946, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948210960719734, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956891117617488, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957865691685583, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997937132284278, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988576890391414, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.mlp": [ - { - "accuracy": 0.9158639279194176, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9180213767103851, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9294441314414144, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9330894355662167, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.957464812323451, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9608181787189096, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9656330910511315, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778447088319808, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797174901468679, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783213328337297, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980991468182765, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889646537485532, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905278565129265, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939716373337433, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943708716891706, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953189147054218, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983196064422373, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.self_attn": [ - { - "accuracy": 0.9453031735029072, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.947599885519594, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9615311834495515, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9673695845995098, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726034061750397, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973762709996663, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840772187453695, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846537007833831, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860100900987163, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865770939504728, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870990435010754, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875901830964722, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990298849705141, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911486245691776, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947567474737298, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955847386736423, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957721448154189, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977319242752856, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988463749687071, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.mlp": [ - { - "accuracy": 0.9177636303938925, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9197718556970358, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9308650940656662, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9343268545344472, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9582712142728269, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9616336179897189, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.966319962637499, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783438154263422, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9801868442445993, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787261433666572, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813841371797025, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891706302878447, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907169395592064, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994102988333907, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944744110980537, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953857853543013, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983665595718776, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.self_attn": [ - { - "accuracy": 0.9455354672390968, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9469714930746704, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627764064352959, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9695670418441296, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739544703625143, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973984832293354, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836120647378266, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853858097922057, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864888042211533, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869474183651619, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865509275114164, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876153381192125, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905368478503078, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912367287324741, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948935327120125, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956507771566976, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959841922827763, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978202994825551, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988987850156263, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.mlp": [ - { - "accuracy": 0.9193624937906861, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9214048478752375, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9323414056561887, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9357543224468827, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9592122689355165, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9624072373844683, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9670258588157594, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978809519787319, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806046485900879, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792216615751386, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817683582659811, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894269742071629, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909094926551916, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942436333803926, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994609877205221, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955192905035801, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984049302074709, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.self_attn": [ - { - "accuracy": 0.9404011566657573, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9433240909129381, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9602424707263708, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9665921044070274, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715966744115576, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724076569546014, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833675464615226, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841917730518617, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851589428144507, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857188350288197, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862667275592685, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870672936085612, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902565403026529, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909417814924382, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994574425421888, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954747567535378, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955738843418658, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977578095858917, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998841377964709, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.mlp": [ - { - "accuracy": 0.9186047078110278, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9206485245376825, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9316646344959736, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9351086770184338, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9587178153451532, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620125978253782, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666859488934278, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784829050768167, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803593787364662, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978964647743851, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815595266409218, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893016122514382, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908091883407906, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941585336055141, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945550950651523, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954838165140245, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984017823735485, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.self_attn": [ - { - "accuracy": 0.9394990969449282, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.942460345569998, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9557730441447347, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.965072724269703, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9704339450690895, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9703521281480789, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822872882941738, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836249899817631, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847401663428172, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854487778502516, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850060413591564, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861198343569413, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888149123289622, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898839803645387, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939130576967727, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948843962629326, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954387155594304, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974715476273559, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987710739515023, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.mlp": [ - { - "accuracy": 0.9163005952723324, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.918469125404954, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9296248671598732, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9331888132728636, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9575300044380128, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9608753880020231, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9656291990540922, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778184004826471, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979727994883433, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783944737864658, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810124774230644, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890158255584538, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905390292406082, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993990561546525, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944085531751625, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953591679804958, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983476925772266, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.self_attn": [ - { - "accuracy": 0.9385047152172774, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9403184007387608, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9591853693127632, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.964900323189795, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9700594161404297, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712724881246686, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827493205666542, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98411433811998, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985582887136843, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860108846914954, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856000928557478, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863569383742288, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897499623475596, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907412502798252, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945465856872033, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953058749961201, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956212444521952, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977715167042334, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998782638591365, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.mlp": [ - { - "accuracy": 0.9124609748832881, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9147532572969794, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9261747137643397, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9297866364941001, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9550380471628159, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9588878394570202, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9638205419760197, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765205213334411, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785497068660334, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769384742248803, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799193402286619, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881916831363924, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899551023263484, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934662149462383, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939908874803223, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994973327673506, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998225424613338, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.30.self_attn": [ - { - "accuracy": 0.932601559907198, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9325447557494044, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9502597921527922, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9580593486316502, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9663902919273823, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9676041130442172, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803901442792267, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814518506173044, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829392761457711, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836074479389936, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982743862667121, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845992345944978, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885137914097868, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989409708941821, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993628785625333, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994681420532288, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995017617882695, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997364231661777, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998652608395787, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.30.mlp": [ - { - "accuracy": 0.9006825247779489, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9037869111634791, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9145693499594927, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9179762960411608, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9476517622824758, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9535882545169443, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9582948233000934, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9733557571889833, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755601864308119, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728824478806928, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772182946326211, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986052792344708, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885776020237245, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922641341981944, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928313403506763, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937583317805547, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978871138009708, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.31.self_attn": [ - { - "accuracy": 0.9329060865566134, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9348193083424121, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9488459783606231, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620603783987463, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9660860695876181, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9674650644883513, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793640852440149, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802794649731368, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822994578862563, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830517311347649, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983036509831436, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838694853242487, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871122187469155, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880716315237805, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930589833238628, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940008672419935, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947117104020435, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968919773527887, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998575938414433, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.31.mlp": [ - { - "accuracy": 0.8746713474392891, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8789603128097951, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8878425331786275, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8910509417764843, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9346689027734101, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9423672221601009, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9463135825935751, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9664369556121528, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691414601402357, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662359005305916, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9713924188399687, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825330476742238, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855217197327875, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901641281321645, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909153576008976, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917470523505472, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970585444098106, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.norm.norm": null, - "lm_head.linear": null - }, - "last_module_idx": 66 -} \ No newline at end of file