fixed cache over-alloc bug
Browse files- variable_cache.py +1 -0
variable_cache.py
CHANGED
@@ -83,6 +83,7 @@ class VariableCache(Cache_4_44_2, Cache):
|
|
83 |
if attention_config.no_op or attention_config.replace_with_linear:
|
84 |
return None
|
85 |
config = deepcopy(self.config)
|
|
|
86 |
config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
|
87 |
return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
|
88 |
|
|
|
83 |
if attention_config.no_op or attention_config.replace_with_linear:
|
84 |
return None
|
85 |
config = deepcopy(self.config)
|
86 |
+
config.num_hidden_layers = 1
|
87 |
config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
|
88 |
return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
|
89 |
|