nanoLLaVA

Running on Zero

App Files Files Community

qnguyen3 commited on Apr 8, 2024

Commit

07368a8

verified ·

1 Parent(s): 53860b6

Update modeling_llava_qwen2.py

Browse files

Files changed (1) hide show

modeling_llava_qwen2.py +5 -5

modeling_llava_qwen2.py CHANGED Viewed

@@ -535,13 +535,13 @@ class SigLipVisionTower(nn.Module):
         if type(images) is list:
             image_features = []
             for image in images:
-                image_forward_out = self.vision_tower(image.to(device=self.device, dtype=self.dtype).unsqueeze(0),
                                                       output_hidden_states=True)
                 image_feature = image_forward_out.hidden_states[-1].to(image.dtype)
                 assert image_features.shape[-2] == 729
                 image_features.append(image_feature)
         else:
-            image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype),
                                                    output_hidden_states=True)
             image_features = image_forward_outs.hidden_states[-1].to(images.dtype)
             assert image_features.shape[-2] == 729
@@ -550,7 +550,7 @@ class SigLipVisionTower(nn.Module):
     @property
     def dummy_feature(self):
-        return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype)
     @property
     def dtype(self):
@@ -682,9 +682,9 @@ class LlavaMetaForCausalLM(ABC):
             image_features = self.encode_images(concat_images)
             split_sizes = [image.shape[0] for image in images]
             image_features = torch.split(image_features, split_sizes, dim=0)
-            image_features = [x.flatten(0, 1).to(self.device) for x in image_features]
         else:
-            image_features = self.encode_images(images).to(self.device)
         # Let's just add dummy tensors if they do not exist,
         # it is a headache to deal with None all the time.

         if type(images) is list:
             image_features = []
             for image in images:
+                image_forward_out = self.vision_tower(image.to(device="cuda:0", dtype=self.dtype).unsqueeze(0),
                                                       output_hidden_states=True)
                 image_feature = image_forward_out.hidden_states[-1].to(image.dtype)
                 assert image_features.shape[-2] == 729
                 image_features.append(image_feature)
         else:
+            image_forward_outs = self.vision_tower(images.to(device="cuda:0", dtype=self.dtype),
                                                    output_hidden_states=True)
             image_features = image_forward_outs.hidden_states[-1].to(images.dtype)
             assert image_features.shape[-2] == 729
     @property
     def dummy_feature(self):
+        return torch.zeros(1, self.hidden_size, device="cuda:0", dtype=self.dtype)
     @property
     def dtype(self):
             image_features = self.encode_images(concat_images)
             split_sizes = [image.shape[0] for image in images]
             image_features = torch.split(image_features, split_sizes, dim=0)
+            image_features = [x.flatten(0, 1).to("cuda:0") for x in image_features]
         else:
+            image_features = self.encode_images(images).to("cuda:0")
         # Let's just add dummy tensors if they do not exist,
         # it is a headache to deal with None all the time.