winglian commited on
Commit
2675fb7
1 Parent(s): 1076bcb

update readme for SDP

Browse files
Files changed (1) hide show
  1. README.md +3 -0
README.md CHANGED
@@ -300,6 +300,9 @@ weight_decay:
300
  xformers_attention:
301
  # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
302
  flash_attention: # require a100 for llama
 
 
 
303
 
304
  # resume from a specific checkpoint dir
305
  resume_from_checkpoint:
 
300
  xformers_attention:
301
  # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
302
  flash_attention: # require a100 for llama
303
+ # whether to use scaled-dot-product attention
304
+ # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
305
+ sdp_attention:
306
 
307
  # resume from a specific checkpoint dir
308
  resume_from_checkpoint: