Flash attn hotfix (#951)
Browse files* use previous arg
* use eager to use legacy attention that can be patched
src/axolotl/utils/models.py
CHANGED
@@ -324,6 +324,10 @@ def load_model(
|
|
324 |
model_config._attn_implementation = ( # pylint: disable=protected-access
|
325 |
"flash_attention_2"
|
326 |
)
|
|
|
|
|
|
|
|
|
327 |
|
328 |
try:
|
329 |
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
|
|
|
324 |
model_config._attn_implementation = ( # pylint: disable=protected-access
|
325 |
"flash_attention_2"
|
326 |
)
|
327 |
+
else:
|
328 |
+
model_config._attn_implementation = ( # pylint: disable=protected-access
|
329 |
+
"eager"
|
330 |
+
)
|
331 |
|
332 |
try:
|
333 |
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
|