| { | |
| "measurement": { | |
| "model.layers.0": { | |
| "accuracy": 0.9287314414978027, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.1": { | |
| "accuracy": 0.914562463760376, | |
| "total_bits": 472429600, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.1, | |
| 0.9 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.2": { | |
| "accuracy": 0.9650976061820984, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.3": { | |
| "accuracy": 0.9562180042266846, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.4": { | |
| "accuracy": 0.9498416185379028, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.5": { | |
| "accuracy": 0.9452450275421143, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.6": { | |
| "accuracy": 0.93950355052948, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.7": { | |
| "accuracy": 0.9338642358779907, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.8": { | |
| "accuracy": 0.9276525974273682, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.9": { | |
| "accuracy": 0.9260540008544922, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.10": { | |
| "accuracy": 0.9225144386291504, | |
| "total_bits": 424064032, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.01, | |
| 0.99 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.11": { | |
| "accuracy": 0.9225969314575195, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.12": { | |
| "accuracy": 0.9214274883270264, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.13": { | |
| "accuracy": 0.9215571880340576, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.14": { | |
| "accuracy": 0.9176080226898193, | |
| "total_bits": 435991584, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.15": { | |
| "accuracy": 0.923285961151123, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.16": { | |
| "accuracy": 0.9177236557006836, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.17": { | |
| "accuracy": 0.9207503795623779, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.18": { | |
| "accuracy": 0.9209573268890381, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.19": { | |
| "accuracy": 0.9213593006134033, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.20": { | |
| "accuracy": 0.920569658279419, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.21": { | |
| "accuracy": 0.9229907989501953, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.22": { | |
| "accuracy": 0.9201843738555908, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.23": { | |
| "accuracy": 0.9208340644836426, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.24": { | |
| "accuracy": 0.9214305877685547, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.25": { | |
| "accuracy": 0.919379472732544, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.26": { | |
| "accuracy": 0.9194035530090332, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.27": { | |
| "accuracy": 0.9180495738983154, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.28": { | |
| "accuracy": 0.9156413078308105, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.29": { | |
| "accuracy": 0.9198658466339111, | |
| "total_bits": 478983200, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.05, | |
| 0.95 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.4, | |
| 0.6 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.30": { | |
| "accuracy": 0.9203238487243652, | |
| "total_bits": 578335776, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.35, | |
| 0.65 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.35, | |
| 0.65 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.35, | |
| 0.65 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.35, | |
| 0.65 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.35, | |
| 0.65 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.5, | |
| 0.5 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.31": { | |
| "accuracy": 0.9024977684020996, | |
| "total_bits": 446084128, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.02, | |
| 0.98 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128, | |
| "2": 128 | |
| }, | |
| "bits": [ | |
| 4, | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 0.2, | |
| 0.8 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| } | |
| } | |
| } |