OpenGVLab
/

InternVL2-Llama3-76B-AWQ

Image-Text-to-Text

feature-extraction

Model card Files Files and versions Community

czczup commited on Aug 22

Commit

451d761

•

1 Parent(s): 732e1a9

Upload folder using huggingface_hub

Files changed (2) hide show

generation_config.json +6 -1
modeling_intern_vit.py +2 -2

generation_config.json CHANGED Viewed

@@ -1,4 +1,9 @@
 {
   "_from_model_config": true,
-  "transformers_version": "4.37.2"
 }

 {
   "_from_model_config": true,
+  "transformers_version": "4.37.2",
+  "eos_token_id": [
+    128001,
+    128002,
+    128003
+  ]
 }

modeling_intern_vit.py CHANGED Viewed

@@ -287,9 +287,9 @@ class InternVisionEncoderLayer(nn.Module):
         Args:
             hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
         """
-        hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1)
-        hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)
         return hidden_states

         Args:
             hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
         """
+        hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states).to(hidden_states.dtype)) * self.ls1)
+        hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states).to(hidden_states.dtype)) * self.ls2)
         return hidden_states