rag_ColPali_Qwen2VL_7B

Running

AdrienB134 commited on Aug 31, 2024

Commit

20f229d

1 Parent(s): 597a940

gedet

Files changed (1) hide show

app.py CHANGED Viewed

@@ -47,7 +47,7 @@ def model_inference(
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
-        #attn_implementation="flash_attention_2",
         trust_remote_code=True,
         torch_dtype="auto").cuda().eval()

     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
+        attn_implementation="flash_attention_2",
         trust_remote_code=True,
         torch_dtype="auto").cuda().eval()