v3: Comprehensive eval + research paper
Browse files- results/eval_v3.json +497 -0
results/eval_v3.json
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"n_layers": 24,
|
| 4 |
+
"n_kv_heads": 2,
|
| 5 |
+
"n_q_heads": 14,
|
| 6 |
+
"head_dim": 64
|
| 7 |
+
},
|
| 8 |
+
"budget": 512,
|
| 9 |
+
"generation": {
|
| 10 |
+
"FourierKV": {
|
| 11 |
+
"token_match_pct": 3.076923076923077,
|
| 12 |
+
"full_tok_s": 30.988180726951505,
|
| 13 |
+
"comp_tok_s": 33.222024966081214,
|
| 14 |
+
"speedup": 1.0720869759261076,
|
| 15 |
+
"compress_time_ms": 2198.4407330000977,
|
| 16 |
+
"cache_full_mb": 50.331648,
|
| 17 |
+
"cache_comp_mb": 7.077888,
|
| 18 |
+
"cache_ratio": 7.111111111111111,
|
| 19 |
+
"cache_saved_pct": 85.9375,
|
| 20 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 21 |
+
"comp_text_sample": " recently published in the journal \"Journal of Physics\" (J. Phys. Phys. 2019, the theory of the theory of the theory of the theory of the theory of the theory of the theory of the theory of the theory"
|
| 22 |
+
},
|
| 23 |
+
"WaveletKV": {
|
| 24 |
+
"token_match_pct": 6.153846153846154,
|
| 25 |
+
"full_tok_s": 33.14508666349456,
|
| 26 |
+
"comp_tok_s": 33.20286061523443,
|
| 27 |
+
"speedup": 1.0017430622922994,
|
| 28 |
+
"compress_time_ms": 2115.08227000013,
|
| 29 |
+
"cache_full_mb": 50.331648,
|
| 30 |
+
"cache_comp_mb": 7.077888,
|
| 31 |
+
"cache_ratio": 7.111111111111111,
|
| 32 |
+
"cache_saved_pct": 85.9375,
|
| 33 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 34 |
+
"comp_text_sample": " recently, the wavelet transform has its roots in harmonic analysis, a branch of mathematics concerned with the representation of functions in terms of basic waves. Unlike the classical Fourier transf"
|
| 35 |
+
},
|
| 36 |
+
"WaveletFourierKV": {
|
| 37 |
+
"token_match_pct": 3.076923076923077,
|
| 38 |
+
"full_tok_s": 33.16380459017609,
|
| 39 |
+
"comp_tok_s": 32.35348312660268,
|
| 40 |
+
"speedup": 0.9755660885545983,
|
| 41 |
+
"compress_time_ms": 2270.167274000414,
|
| 42 |
+
"cache_full_mb": 50.331648,
|
| 43 |
+
"cache_comp_mb": 7.077888,
|
| 44 |
+
"cache_ratio": 7.111111111111111,
|
| 45 |
+
"cache_saved_pct": 85.9375,
|
| 46 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 47 |
+
"comp_text_sample": " recently published in the journal \"Journal of Physics\" (J. Phys. Phys. 2019, the theory of the theory of the theory of the theory of the theory of the theory of the theory of the theory of the theory"
|
| 48 |
+
},
|
| 49 |
+
"WaveletTriAttn": {
|
| 50 |
+
"token_match_pct": 1.5384615384615385,
|
| 51 |
+
"full_tok_s": 33.409272713027214,
|
| 52 |
+
"comp_tok_s": 33.09489938263992,
|
| 53 |
+
"speedup": 0.9905902372051549,
|
| 54 |
+
"compress_time_ms": 2164.2266899998504,
|
| 55 |
+
"cache_full_mb": 50.331648,
|
| 56 |
+
"cache_comp_mb": 7.077888,
|
| 57 |
+
"cache_ratio": 7.111111111111111,
|
| 58 |
+
"cache_saved_pct": 85.9375,
|
| 59 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 60 |
+
"comp_text_sample": " recently. The wavelet transform is a wavelet-based transform that is particularly well suited for signal processing tasks, such as denoising, filtering, and feature extraction. Wavelet transforms are"
|
| 61 |
+
},
|
| 62 |
+
"TriAttentionKV": {
|
| 63 |
+
"token_match_pct": 3.076923076923077,
|
| 64 |
+
"full_tok_s": 32.106188953769895,
|
| 65 |
+
"comp_tok_s": 33.04113720504362,
|
| 66 |
+
"speedup": 1.0291204992156138,
|
| 67 |
+
"compress_time_ms": 2090.7214079998084,
|
| 68 |
+
"cache_full_mb": 50.331648,
|
| 69 |
+
"cache_comp_mb": 7.077888,
|
| 70 |
+
"cache_ratio": 7.111111111111111,
|
| 71 |
+
"cache_saved_pct": 85.9375,
|
| 72 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 73 |
+
"comp_text_sample": " recently, the theory of compressive sensing has been developed. Compressive sensing is a method of compressing data by compressing it in a way that minimizes the amount of data that is lost during th"
|
| 74 |
+
},
|
| 75 |
+
"TurboQuant-4bit": {
|
| 76 |
+
"token_match_pct": 1.5384615384615385,
|
| 77 |
+
"full_tok_s": 32.932947978328826,
|
| 78 |
+
"comp_tok_s": 26.310400443259233,
|
| 79 |
+
"speedup": 0.798908146934602,
|
| 80 |
+
"compress_time_ms": 2072.0481440002914,
|
| 81 |
+
"cache_full_mb": 50.331648,
|
| 82 |
+
"cache_comp_mb": 51.11808,
|
| 83 |
+
"cache_ratio": 0.9846153846153847,
|
| 84 |
+
"cache_saved_pct": -1.5625,
|
| 85 |
+
"full_text_sample": " recently, neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a\nnatural signal-processing problem. During autoregressive generation, each\nattention",
|
| 86 |
+
"comp_text_sample": " recently neural network compression.\n\nIn the context of large language models, the key-value (KV) cache presents a natural signal-processing problem. During autoregressive generation, each attention "
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
"aop": {
|
| 90 |
+
"FourierKV": {
|
| 91 |
+
"aop_mean": 0.5633386079967022,
|
| 92 |
+
"aop_max": 3.814152479171753,
|
| 93 |
+
"compress_ms": 2102.956095000991
|
| 94 |
+
},
|
| 95 |
+
"WaveletKV": {
|
| 96 |
+
"aop_mean": 0.4843935391306877,
|
| 97 |
+
"aop_max": 3.278111219406128,
|
| 98 |
+
"compress_ms": 2120.3466820006724
|
| 99 |
+
},
|
| 100 |
+
"WaveletFourierKV": {
|
| 101 |
+
"aop_mean": 0.5562485100328922,
|
| 102 |
+
"aop_max": 3.7110695838928223,
|
| 103 |
+
"compress_ms": 2254.255785001078
|
| 104 |
+
},
|
| 105 |
+
"WaveletTriAttn": {
|
| 106 |
+
"aop_mean": 0.5148299875855445,
|
| 107 |
+
"aop_max": 3.34613299369812,
|
| 108 |
+
"compress_ms": 2129.9441539995314
|
| 109 |
+
},
|
| 110 |
+
"TriAttentionKV": {
|
| 111 |
+
"aop_mean": 0.5786597217619419,
|
| 112 |
+
"aop_max": 3.6975958347320557,
|
| 113 |
+
"compress_ms": 2093.632633999732
|
| 114 |
+
},
|
| 115 |
+
"TurboQuant-4bit": {
|
| 116 |
+
"aop_mean": 0.5923798483610153,
|
| 117 |
+
"aop_max": 3.5809719562530518,
|
| 118 |
+
"compress_ms": 2075.7475410009647
|
| 119 |
+
}
|
| 120 |
+
},
|
| 121 |
+
"perplexity": {
|
| 122 |
+
"FourierKV": {
|
| 123 |
+
"ppl_full": 1.005968988220749,
|
| 124 |
+
"ppl_comp": 5.646602673671573,
|
| 125 |
+
"nd_ppl": 4.613098111033922
|
| 126 |
+
},
|
| 127 |
+
"WaveletKV": {
|
| 128 |
+
"ppl_full": 1.005968988220749,
|
| 129 |
+
"ppl_comp": 4.989580209828525,
|
| 130 |
+
"nd_ppl": 3.9599741429940334
|
| 131 |
+
},
|
| 132 |
+
"WaveletFourierKV": {
|
| 133 |
+
"ppl_full": 1.005968988220749,
|
| 134 |
+
"ppl_comp": 5.203080556978721,
|
| 135 |
+
"nd_ppl": 4.1722076686074585
|
| 136 |
+
},
|
| 137 |
+
"WaveletTriAttn": {
|
| 138 |
+
"ppl_full": 1.005968988220749,
|
| 139 |
+
"ppl_comp": 3.372750290091105,
|
| 140 |
+
"nd_ppl": 2.3527378140444357
|
| 141 |
+
},
|
| 142 |
+
"TriAttentionKV": {
|
| 143 |
+
"ppl_full": 1.005968988220749,
|
| 144 |
+
"ppl_comp": 4.4477835409904305,
|
| 145 |
+
"nd_ppl": 3.4213922684070757
|
| 146 |
+
},
|
| 147 |
+
"TurboQuant-4bit": {
|
| 148 |
+
"ppl_full": 1.005968988220749,
|
| 149 |
+
"ppl_comp": 1.085411066312208,
|
| 150 |
+
"nd_ppl": 0.07897070211106684
|
| 151 |
+
}
|
| 152 |
+
},
|
| 153 |
+
"multi_turn_drift": {
|
| 154 |
+
"FourierKV": [
|
| 155 |
+
{
|
| 156 |
+
"turn": 1,
|
| 157 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 158 |
+
"token_match_pct": 8.16326530612245,
|
| 159 |
+
"aop": 3.5391831398010254,
|
| 160 |
+
"full_cache_seq": 4171,
|
| 161 |
+
"comp_cache_seq": 569,
|
| 162 |
+
"compress_ms": 49.04297500070243,
|
| 163 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 164 |
+
"comp_sample": ": Fourier and wavelet transforms.\nContext: Fourier and wavelet transforms.\nExplanation: Fourier transforms are used to a"
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"turn": 2,
|
| 168 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 169 |
+
"token_match_pct": 10.204081632653061,
|
| 170 |
+
"aop": 3.7353036403656006,
|
| 171 |
+
"full_cache_seq": 4251,
|
| 172 |
+
"comp_cache_seq": 569,
|
| 173 |
+
"compress_ms": 44.40667199924064,
|
| 174 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 175 |
+
"comp_sample": ": Daubechies are used to analyze Daubechies are used to analyze Daubechies are used to analyze to Daubechies are used to"
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"turn": 3,
|
| 179 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 180 |
+
"token_match_pct": 2.0408163265306123,
|
| 181 |
+
"aop": 3.708634376525879,
|
| 182 |
+
"full_cache_seq": 4324,
|
| 183 |
+
"comp_cache_seq": 569,
|
| 184 |
+
"compress_ms": 44.16080900045927,
|
| 185 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 186 |
+
"comp_sample": ": Daubechies are used to analyze non-periodic signals.\nDaubechies are used to analyze periodic signals, while Daubechies"
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"turn": 4,
|
| 190 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 191 |
+
"token_match_pct": 2.0408163265306123,
|
| 192 |
+
"aop": 3.3769609928131104,
|
| 193 |
+
"full_cache_seq": 4397,
|
| 194 |
+
"comp_cache_seq": 569,
|
| 195 |
+
"compress_ms": 40.785990000586025,
|
| 196 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 197 |
+
"comp_sample": ": Daubechies are used to analyze non-periodic signals.\nDaubechies are used to analyze periodic signals, while Daubechies"
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"turn": 5,
|
| 201 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 202 |
+
"token_match_pct": 2.0408163265306123,
|
| 203 |
+
"aop": 3.9113471508026123,
|
| 204 |
+
"full_cache_seq": 4475,
|
| 205 |
+
"comp_cache_seq": 569,
|
| 206 |
+
"compress_ms": 49.11851599899819,
|
| 207 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 208 |
+
"comp_sample": ": Daubechies are used to analyze non-periodic signals.\nDaubechies are used to analyze non-periodic signals.\nDaubechies a"
|
| 209 |
+
}
|
| 210 |
+
],
|
| 211 |
+
"WaveletKV": [
|
| 212 |
+
{
|
| 213 |
+
"turn": 1,
|
| 214 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 215 |
+
"token_match_pct": 6.122448979591836,
|
| 216 |
+
"aop": 3.1009209156036377,
|
| 217 |
+
"full_cache_seq": 4171,
|
| 218 |
+
"comp_cache_seq": 569,
|
| 219 |
+
"compress_ms": 59.56875599986233,
|
| 220 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 221 |
+
"comp_sample": ": Fourier and wavelet transforms and compare their dual localization and localization in signal analysis. Fourier transf"
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"turn": 2,
|
| 225 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 226 |
+
"token_match_pct": 2.127659574468085,
|
| 227 |
+
"aop": 2.4293277263641357,
|
| 228 |
+
"full_cache_seq": 4251,
|
| 229 |
+
"comp_cache_seq": 569,
|
| 230 |
+
"compress_ms": 59.20457300089765,
|
| 231 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 232 |
+
"comp_sample": ": Fourier transforms capture periodicity, while wavelet transforms: Fourier transforms capture periodicity, while wavele"
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"turn": 3,
|
| 236 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 237 |
+
"token_match_pct": 2.0408163265306123,
|
| 238 |
+
"aop": 2.655151128768921,
|
| 239 |
+
"full_cache_seq": 4324,
|
| 240 |
+
"comp_cache_seq": 569,
|
| 241 |
+
"compress_ms": 60.057925000364776,
|
| 242 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 243 |
+
"comp_sample": ": Fourier transforms capture periodicity, while Fourier transforms capture non-periodicity, while Fourier transforms cap"
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"turn": 4,
|
| 247 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 248 |
+
"token_match_pct": 2.0408163265306123,
|
| 249 |
+
"aop": 2.8599326610565186,
|
| 250 |
+
"full_cache_seq": 4397,
|
| 251 |
+
"comp_cache_seq": 569,
|
| 252 |
+
"compress_ms": 60.129523000796326,
|
| 253 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 254 |
+
"comp_sample": ": Fourier transforms capture periodicity, while Fourier transforms capture non-periodicity, while Fourier transforms cap"
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"turn": 5,
|
| 258 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 259 |
+
"token_match_pct": 4.081632653061225,
|
| 260 |
+
"aop": 3.2390105724334717,
|
| 261 |
+
"full_cache_seq": 4475,
|
| 262 |
+
"comp_cache_seq": 569,
|
| 263 |
+
"compress_ms": 62.97118900147325,
|
| 264 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 265 |
+
"comp_sample": ": Fourier transforms capture periodicity, while Fourier transforms capture non-periodicity, while Fourier transforms cap"
|
| 266 |
+
}
|
| 267 |
+
],
|
| 268 |
+
"WaveletFourierKV": [
|
| 269 |
+
{
|
| 270 |
+
"turn": 1,
|
| 271 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 272 |
+
"token_match_pct": 8.16326530612245,
|
| 273 |
+
"aop": 4.497939586639404,
|
| 274 |
+
"full_cache_seq": 4171,
|
| 275 |
+
"comp_cache_seq": 569,
|
| 276 |
+
"compress_ms": 196.26006000180496,
|
| 277 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 278 |
+
"comp_sample": ": Fourier and wavelet transforms.\nContext: Fourier and wavelet transforms.\nExplanation: Fourier transforms are used to a"
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"turn": 2,
|
| 282 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 283 |
+
"token_match_pct": 8.16326530612245,
|
| 284 |
+
"aop": 4.285220623016357,
|
| 285 |
+
"full_cache_seq": 4251,
|
| 286 |
+
"comp_cache_seq": 569,
|
| 287 |
+
"compress_ms": 209.2363359988667,
|
| 288 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 289 |
+
"comp_sample": ": Daubech transforms are used to analyze Daubechies. Wavelets are used to analyze periodic signals, while wavelets are u"
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"turn": 3,
|
| 293 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 294 |
+
"token_match_pct": 2.0408163265306123,
|
| 295 |
+
"aop": 4.274879455566406,
|
| 296 |
+
"full_cache_seq": 4324,
|
| 297 |
+
"comp_cache_seq": 569,
|
| 298 |
+
"compress_ms": 193.79559200024232,
|
| 299 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 300 |
+
"comp_sample": ": Daubech transforms are used to analyze non-periodic signals. The Daubech transforms are used to analyze non-periodic s"
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"turn": 4,
|
| 304 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 305 |
+
"token_match_pct": 2.0408163265306123,
|
| 306 |
+
"aop": 4.316390037536621,
|
| 307 |
+
"full_cache_seq": 4397,
|
| 308 |
+
"comp_cache_seq": 569,
|
| 309 |
+
"compress_ms": 191.49548699897423,
|
| 310 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 311 |
+
"comp_sample": ": Daubech transforms are used to analyze non-periodic signals. The Daubech transforms are used to analyze periodic signa"
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"turn": 5,
|
| 315 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 316 |
+
"token_match_pct": 2.0408163265306123,
|
| 317 |
+
"aop": 4.061666488647461,
|
| 318 |
+
"full_cache_seq": 4475,
|
| 319 |
+
"comp_cache_seq": 569,
|
| 320 |
+
"compress_ms": 198.00259600015124,
|
| 321 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 322 |
+
"comp_sample": ": Daubech transforms are used to analyze non-periodic signals. The Daubech transforms are used to analyze non-periodic s"
|
| 323 |
+
}
|
| 324 |
+
],
|
| 325 |
+
"WaveletTriAttn": [
|
| 326 |
+
{
|
| 327 |
+
"turn": 1,
|
| 328 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 329 |
+
"token_match_pct": 6.122448979591836,
|
| 330 |
+
"aop": 3.596172332763672,
|
| 331 |
+
"full_cache_seq": 4171,
|
| 332 |
+
"comp_cache_seq": 569,
|
| 333 |
+
"compress_ms": 71.83850100045674,
|
| 334 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 335 |
+
"comp_sample": ": Fourier transform and wavelet transform. Answer: Fourier transform and wavelet transform. Fourier transform decomposes"
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"turn": 2,
|
| 339 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 340 |
+
"token_match_pct": 8.16326530612245,
|
| 341 |
+
"aop": 3.0550146102905273,
|
| 342 |
+
"full_cache_seq": 4251,
|
| 343 |
+
"comp_cache_seq": 569,
|
| 344 |
+
"compress_ms": 70.60234300115553,
|
| 345 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 346 |
+
"comp_sample": ": Daubech transform decomposes a signal into a sum of sinusoids, while wavelet transform decomposes a signal into a sum "
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"turn": 3,
|
| 350 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 351 |
+
"token_match_pct": 2.0408163265306123,
|
| 352 |
+
"aop": 3.115457534790039,
|
| 353 |
+
"full_cache_seq": 4324,
|
| 354 |
+
"comp_cache_seq": 569,
|
| 355 |
+
"compress_ms": 72.92050899923197,
|
| 356 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 357 |
+
"comp_sample": ": Daubech transform decomposes a signal into a sum of sinusoids and scales. Wavelet transform decomposes a signal into a"
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"turn": 4,
|
| 361 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 362 |
+
"token_match_pct": 4.081632653061225,
|
| 363 |
+
"aop": 3.2187230587005615,
|
| 364 |
+
"full_cache_seq": 4397,
|
| 365 |
+
"comp_cache_seq": 569,
|
| 366 |
+
"compress_ms": 72.12616700053331,
|
| 367 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 368 |
+
"comp_sample": ": Daubech transform decomposes a signal into a sum of sinusoids and scales. Daubech transform decomposes a signal into a"
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"turn": 5,
|
| 372 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 373 |
+
"token_match_pct": 2.0408163265306123,
|
| 374 |
+
"aop": 3.6503641605377197,
|
| 375 |
+
"full_cache_seq": 4475,
|
| 376 |
+
"comp_cache_seq": 569,
|
| 377 |
+
"compress_ms": 71.92714700067881,
|
| 378 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 379 |
+
"comp_sample": ": Daubech transform decomposes a signal into a sum of sinusoids and scales. Wavelet transform decomposes a signal into a"
|
| 380 |
+
}
|
| 381 |
+
],
|
| 382 |
+
"TriAttentionKV": [
|
| 383 |
+
{
|
| 384 |
+
"turn": 1,
|
| 385 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 386 |
+
"token_match_pct": 4.081632653061225,
|
| 387 |
+
"aop": 3.3528027534484863,
|
| 388 |
+
"full_cache_seq": 4171,
|
| 389 |
+
"comp_cache_seq": 569,
|
| 390 |
+
"compress_ms": 28.662270000495482,
|
| 391 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 392 |
+
"comp_sample": ": Fourier and wavelet transforms, their properties, and applications, and their advantages and disadvantages, and their "
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"turn": 2,
|
| 396 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 397 |
+
"token_match_pct": 10.204081632653061,
|
| 398 |
+
"aop": 3.2024409770965576,
|
| 399 |
+
"full_cache_seq": 4251,
|
| 400 |
+
"comp_cache_seq": 569,
|
| 401 |
+
"compress_ms": 29.552105001130258,
|
| 402 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 403 |
+
"comp_sample": ": Daubechies are used for signal processing and analysis, and Daubechies are more efficient computation and applications"
|
| 404 |
+
},
|
| 405 |
+
{
|
| 406 |
+
"turn": 3,
|
| 407 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 408 |
+
"token_match_pct": 2.0408163265306123,
|
| 409 |
+
"aop": 3.3311519622802734,
|
| 410 |
+
"full_cache_seq": 4324,
|
| 411 |
+
"comp_cache_seq": 569,
|
| 412 |
+
"compress_ms": 27.73076500125171,
|
| 413 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 414 |
+
"comp_sample": ": Daubechies: Daubechies: Daubechies: Daubechies: Daubechies.\nQuestion: What are Daubechies: Daubechies, Daubechies: Dau"
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"turn": 4,
|
| 418 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 419 |
+
"token_match_pct": 2.0408163265306123,
|
| 420 |
+
"aop": 3.0824573040008545,
|
| 421 |
+
"full_cache_seq": 4397,
|
| 422 |
+
"comp_cache_seq": 569,
|
| 423 |
+
"compress_ms": 27.76634300062142,
|
| 424 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 425 |
+
"comp_sample": ": Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubech"
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"turn": 5,
|
| 429 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 430 |
+
"token_match_pct": 2.0408163265306123,
|
| 431 |
+
"aop": 3.4108312129974365,
|
| 432 |
+
"full_cache_seq": 4475,
|
| 433 |
+
"comp_cache_seq": 569,
|
| 434 |
+
"compress_ms": 28.417430999979842,
|
| 435 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 436 |
+
"comp_sample": ": Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubechies: Daubech"
|
| 437 |
+
}
|
| 438 |
+
],
|
| 439 |
+
"TurboQuant-4bit": [
|
| 440 |
+
{
|
| 441 |
+
"turn": 1,
|
| 442 |
+
"question": "Summarize the key differences between Fourier and wavelet tr",
|
| 443 |
+
"token_match_pct": 6.122448979591836,
|
| 444 |
+
"aop": 3.371828317642212,
|
| 445 |
+
"full_cache_seq": 4171,
|
| 446 |
+
"comp_cache_seq": 4171,
|
| 447 |
+
"compress_ms": 33.6713709984906,
|
| 448 |
+
"full_sample": ": Fourier transforms decompose a signal into a series of sine and cosine waves, while wavelet transforms decompose a sig",
|
| 449 |
+
"comp_sample": ": Fourier transforms are a type of wavelet transform, which\nuses a wavelet basis that is a compactly supported wavelet, "
|
| 450 |
+
},
|
| 451 |
+
{
|
| 452 |
+
"turn": 2,
|
| 453 |
+
"question": "What are Daubechies wavelets and why do they have four vanis",
|
| 454 |
+
"token_match_pct": 24.489795918367346,
|
| 455 |
+
"aop": 3.1174914836883545,
|
| 456 |
+
"full_cache_seq": 4251,
|
| 457 |
+
"comp_cache_seq": 4251,
|
| 458 |
+
"compress_ms": 37.42986700126494,
|
| 459 |
+
"full_sample": ": Daubechies wavelets are compactly supported wavelet bases with four vanishing moments. They achieve optimal support le",
|
| 460 |
+
"comp_sample": ": Daubechies wavelets are compactly supported wavelets, while Daubechies wavelets are compactly supported wavelets. Daub"
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"turn": 3,
|
| 464 |
+
"question": "Explain the KV cache memory problem in large language models",
|
| 465 |
+
"token_match_pct": 2.0408163265306123,
|
| 466 |
+
"aop": 2.5435070991516113,
|
| 467 |
+
"full_cache_seq": 4324,
|
| 468 |
+
"comp_cache_seq": 4324,
|
| 469 |
+
"compress_ms": 27.6231240004563,
|
| 470 |
+
"full_sample": ": The KV cache memory problem in large language models is that each attention layer maintains a cache of key and value v",
|
| 471 |
+
"comp_sample": ": Key that captures\nQuestion: What are Daubechies wavelets and why do they have four vanishing moments?\nAnswer: Daubechi"
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"turn": 4,
|
| 475 |
+
"question": "Compare token eviction methods like SnapKV with spectral met",
|
| 476 |
+
"token_match_pct": 2.0408163265306123,
|
| 477 |
+
"aop": 3.4768593311309814,
|
| 478 |
+
"full_cache_seq": 4397,
|
| 479 |
+
"comp_cache_seq": 4397,
|
| 480 |
+
"compress_ms": 29.049380000287783,
|
| 481 |
+
"full_sample": ": Token eviction methods like SnapKV and spectral methods are more flexible and can capture high-level semantic patterns",
|
| 482 |
+
"comp_sample": ": SpectralKV is a hybrid approach that combines two complementary views of the signal. SpectralKV is a hybrid approach t"
|
| 483 |
+
},
|
| 484 |
+
{
|
| 485 |
+
"turn": 5,
|
| 486 |
+
"question": "What is the advantage of the cascaded wavelet-Fourier hybrid",
|
| 487 |
+
"token_match_pct": 6.122448979591836,
|
| 488 |
+
"aop": 2.9454894065856934,
|
| 489 |
+
"full_cache_seq": 4475,
|
| 490 |
+
"comp_cache_seq": 4475,
|
| 491 |
+
"compress_ms": 38.53806899860501,
|
| 492 |
+
"full_sample": ": The cascaded wavelet-Fourier hybrid approach first decomposes the signal via multi-level DWT, then applies FFT within ",
|
| 493 |
+
"comp_sample": ": SpectralKV is a hybrid approach that combines two complementary views of the signal. SpectralKV is a hybrid approach t"
|
| 494 |
+
}
|
| 495 |
+
]
|
| 496 |
+
}
|
| 497 |
+
}
|