rag_ColPali_Qwen2VL_7B

Sleeping

AdrienB134 commited on Aug 31

Commit

597a940

•

1 Parent(s): a4c6545

kuh

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,7 +45,11 @@ def model_inference(
     # )
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
-    model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",attn_implementation="flash_attention_2", trust_remote_code=True, torch_dtype="auto").cuda().eval()
     # default processer
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

     # )
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        "Qwen/Qwen2-VL-2B-Instruct",
+        #attn_implementation="flash_attention_2",
+        trust_remote_code=True,
+        torch_dtype="auto").cuda().eval()
     # default processer
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")