keyfan
/

grok-1-hf

Fix typo in modeling_grok

by keyfan - opened Mar 20

←

Files changed (1) hide show

modeling_grok.py CHANGED Viewed

@@ -273,7 +273,7 @@ class GrokAttention(nn.Module):
         value_states = repeat_kv(value_states, self.num_key_value_groups)
         attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) * self.attn_output_multiplier
-        attn_logits = 30 * torch.tanh(attn_weights / 30)
         if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
             raise ValueError(

         value_states = repeat_kv(value_states, self.num_key_value_groups)
         attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) * self.attn_output_multiplier
+        attn_weights = 30 * torch.tanh(attn_weights / 30)
         if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
             raise ValueError(