tomer-nv commited on
Commit
8ca4513
1 Parent(s): 3209eec

fixed cache over-alloc bug

Browse files
Files changed (1) hide show
  1. variable_cache.py +1 -0
variable_cache.py CHANGED
@@ -83,6 +83,7 @@ class VariableCache(Cache_4_44_2, Cache):
83
  if attention_config.no_op or attention_config.replace_with_linear:
84
  return None
85
  config = deepcopy(self.config)
 
86
  config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
87
  return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
88
 
 
83
  if attention_config.no_op or attention_config.replace_with_linear:
84
  return None
85
  config = deepcopy(self.config)
86
+ config.num_hidden_layers = 1
87
  config.num_key_value_heads = self.config.num_attention_heads // attention_config.n_heads_in_group
88
  return StaticCache(config, self.max_batch_size, self.max_cache_len, device, self.dtype)
89