Commit
•
20cc7f1
1
Parent(s):
3209eec
fixed cache over-alloc bug (#17)
Browse files- fixed cache over-alloc bug (8ca4513e59cb96cc89023824b75a18dda653644d)
Co-authored-by: Tomer Ronen <tomer-nv@users.noreply.huggingface.co>
- variable_cache.py +1 -0
variable_cache.py
CHANGED
@@ -83,6 +83,7 @@ class VariableCache(Cache_4_44_2, Cache):
|
|
83 |
if attention_config.no_op or attention_config.replace_with_linear:
|
84 |
return None
|
85 |
config = deepcopy(self.config)
|
|
|
86 |
config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
|
87 |
return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
|
88 |
|
|
|
83 |
if attention_config.no_op or attention_config.replace_with_linear:
|
84 |
return None
|
85 |
config = deepcopy(self.config)
|
86 |
+
config.num_hidden_layers = 1
|
87 |
config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
|
88 |
return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
|
89 |
|