fixed flash att
Browse files
app.py
CHANGED
|
@@ -16,7 +16,6 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
|
| 16 |
moondream = AutoModelForCausalLM.from_pretrained(
|
| 17 |
model_id, trust_remote_code=True, revision=revision,
|
| 18 |
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
|
| 19 |
-
attn_implementation="flash_attention_2"
|
| 20 |
)
|
| 21 |
|
| 22 |
moondream.eval()
|
|
|
|
| 16 |
moondream = AutoModelForCausalLM.from_pretrained(
|
| 17 |
model_id, trust_remote_code=True, revision=revision,
|
| 18 |
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
|
|
|
|
| 19 |
)
|
| 20 |
|
| 21 |
moondream.eval()
|