AdrienB134 commited on
Commit
697a63b
1 Parent(s): 21bb0fb
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -20,7 +20,7 @@ import time
20
  from PIL import Image
21
  import torch
22
  import subprocess
23
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
24
 
25
 
26
 
@@ -47,7 +47,7 @@ def model_inference(
47
  #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
48
  model = Qwen2VLForConditionalGeneration.from_pretrained(
49
  "Qwen/Qwen2-VL-2B-Instruct",
50
- attn_implementation="flash_attention_2",
51
  trust_remote_code=True,
52
  torch_dtype="auto").cuda().eval()
53
 
 
20
  from PIL import Image
21
  import torch
22
  import subprocess
23
+ #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
24
 
25
 
26
 
 
47
  #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
48
  model = Qwen2VLForConditionalGeneration.from_pretrained(
49
  "Qwen/Qwen2-VL-2B-Instruct",
50
+ #attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
51
  trust_remote_code=True,
52
  torch_dtype="auto").cuda().eval()
53