wangzihan99 commited on
Commit
4690395
2 Parent(s): 1b59f63 f2191b9

Merge branch 'dev_triton' of https://huggingface.co/Qwen/Qwen-7B-Chat-Int4 into pr/8

Browse files
Files changed (1) hide show
  1. modeling_qwen.py +1 -1
modeling_qwen.py CHANGED
@@ -544,7 +544,7 @@ class QWenAttention(nn.Module):
544
  -1, -1, causal_mask.size(2), -1
545
  )
546
  if causal_mask is not None:
547
- attention_mask.masked_fill_(~causal_mask, torch.finfo(query.dtype).min)
548
  else:
549
  attention_mask = causal_mask
550
  attn_output = F.scaled_dot_product_attention(
 
544
  -1, -1, causal_mask.size(2), -1
545
  )
546
  if causal_mask is not None:
547
+ attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
548
  else:
549
  attention_mask = causal_mask
550
  attn_output = F.scaled_dot_product_attention(