| { | |
| "measurement": { | |
| "model.layers.0": { | |
| "accuracy": 0.826019287109375, | |
| "total_bits": 142490128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.1": { | |
| "accuracy": 0.8716602325439453, | |
| "total_bits": 111884816, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.2": { | |
| "accuracy": 0.8723664283752441, | |
| "total_bits": 112409104, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.3": { | |
| "accuracy": 0.8608307838439941, | |
| "total_bits": 111884816, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.4": { | |
| "accuracy": 0.8559846878051758, | |
| "total_bits": 120273424, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.5": { | |
| "accuracy": 0.8652863502502441, | |
| "total_bits": 142490128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.6": { | |
| "accuracy": 0.9044299125671387, | |
| "total_bits": 108214800, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.7": { | |
| "accuracy": 0.9182649850845337, | |
| "total_bits": 112409104, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.8": { | |
| "accuracy": 0.9104313850402832, | |
| "total_bits": 111884816, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.9": { | |
| "accuracy": 0.9092328548431396, | |
| "total_bits": 112409104, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.10": { | |
| "accuracy": 0.9016604423522949, | |
| "total_bits": 112409104, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.11": { | |
| "accuracy": 0.9087564945220947, | |
| "total_bits": 120273424, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.12": { | |
| "accuracy": 0.9003274440765381, | |
| "total_bits": 120273424, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.13": { | |
| "accuracy": 0.9268591403961182, | |
| "total_bits": 151271952, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.25, | |
| 0.75 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.25, | |
| 0.75 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.25, | |
| 0.75 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.25, | |
| 0.75 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.25, | |
| 0.75 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.14": { | |
| "accuracy": 0.9370865821838379, | |
| "total_bits": 168835600, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.15": { | |
| "accuracy": 0.954687237739563, | |
| "total_bits": 186399248, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.16": { | |
| "accuracy": 0.9543647766113281, | |
| "total_bits": 186399248, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.17": { | |
| "accuracy": 0.9534173011779785, | |
| "total_bits": 186399248, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.75, | |
| 0.25 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1.0 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.18": { | |
| "accuracy": 0.9755624532699585, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.19": { | |
| "accuracy": 0.9757581949234009, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.20": { | |
| "accuracy": 0.9778097867965698, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.21": { | |
| "accuracy": 0.9768804311752319, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.22": { | |
| "accuracy": 0.9757040739059448, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.23": { | |
| "accuracy": 0.9632000923156738, | |
| "total_bits": 203962896, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| } | |
| } | |
| } |