rag_ColPali_Qwen2VL_7B

Running on Zero

Mihaiii commited on Sep 9

Commit

ac820b0

•

1 Parent(s): 5fd1a36

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def model_inference(
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
-        "Qwen/Qwen2-VL-2B-Instruct",
         #attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
         trust_remote_code=True,
         torch_dtype=torch.bfloat16).to("cuda:0")
@@ -50,8 +50,8 @@ def model_inference(
     # default processer
     min_pixels = 256*28*28
     max_pixels = 1280*28*28
-    processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
-    # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
     messages = [
         {

     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
+        "Qwen/Qwen2-VL-7B-Instruct",
         #attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
         trust_remote_code=True,
         torch_dtype=torch.bfloat16).to("cuda:0")
     # default processer
     min_pixels = 256*28*28
     max_pixels = 1280*28*28
+    #processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
+    processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
     messages = [
         {