Upload InternVLForConditionalGeneration
Browse files- config.json +1 -0
- model-00001-of-00016.safetensors +2 -2
- model-00002-of-00016.safetensors +2 -2
- model-00003-of-00016.safetensors +2 -2
- model.safetensors.index.json +1 -46
config.json
CHANGED
|
@@ -37,6 +37,7 @@
|
|
| 37 |
"use_sliding_window": false,
|
| 38 |
"vocab_size": 151674
|
| 39 |
},
|
|
|
|
| 40 |
"transformers_version": "4.52.0.dev0",
|
| 41 |
"vision_config": {
|
| 42 |
"architectures": [
|
|
|
|
| 37 |
"use_sliding_window": false,
|
| 38 |
"vocab_size": 151674
|
| 39 |
},
|
| 40 |
+
"torch_dtype": "bfloat16",
|
| 41 |
"transformers_version": "4.52.0.dev0",
|
| 42 |
"vision_config": {
|
| 43 |
"architectures": [
|
model-00001-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c15466cb27d9e0f4b8cbe985e743cc30a648694ed904c965565f17ec82b14045
|
| 3 |
+
size 4988563328
|
model-00002-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7840a85f648d99c34a38a7a0f94e093e6cb4fdc62901be146a5518000560ab2
|
| 3 |
+
size 4937273312
|
model-00003-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90a83f8196f754901bb00a50a29deb2279cfe49db8a0d601468d1cf5bc18994a
|
| 3 |
+
size 4960223800
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"language_model.lm_head.weight": "model-00016-of-00016.safetensors",
|
|
@@ -784,7 +784,6 @@
|
|
| 784 |
"vision_tower.embeddings.patch_embeddings.projection.bias": "model-00001-of-00016.safetensors",
|
| 785 |
"vision_tower.embeddings.patch_embeddings.projection.weight": "model-00001-of-00016.safetensors",
|
| 786 |
"vision_tower.embeddings.position_embeddings": "model-00001-of-00016.safetensors",
|
| 787 |
-
"vision_tower.encoder.layer.0.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 788 |
"vision_tower.encoder.layer.0.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 789 |
"vision_tower.encoder.layer.0.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 790 |
"vision_tower.encoder.layer.0.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -800,7 +799,6 @@
|
|
| 800 |
"vision_tower.encoder.layer.0.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 801 |
"vision_tower.encoder.layer.0.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 802 |
"vision_tower.encoder.layer.0.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 803 |
-
"vision_tower.encoder.layer.1.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 804 |
"vision_tower.encoder.layer.1.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 805 |
"vision_tower.encoder.layer.1.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 806 |
"vision_tower.encoder.layer.1.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -816,7 +814,6 @@
|
|
| 816 |
"vision_tower.encoder.layer.1.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 817 |
"vision_tower.encoder.layer.1.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 818 |
"vision_tower.encoder.layer.1.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 819 |
-
"vision_tower.encoder.layer.10.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 820 |
"vision_tower.encoder.layer.10.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 821 |
"vision_tower.encoder.layer.10.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 822 |
"vision_tower.encoder.layer.10.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -832,7 +829,6 @@
|
|
| 832 |
"vision_tower.encoder.layer.10.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 833 |
"vision_tower.encoder.layer.10.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 834 |
"vision_tower.encoder.layer.10.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 835 |
-
"vision_tower.encoder.layer.11.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 836 |
"vision_tower.encoder.layer.11.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 837 |
"vision_tower.encoder.layer.11.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 838 |
"vision_tower.encoder.layer.11.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -848,7 +844,6 @@
|
|
| 848 |
"vision_tower.encoder.layer.11.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 849 |
"vision_tower.encoder.layer.11.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 850 |
"vision_tower.encoder.layer.11.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 851 |
-
"vision_tower.encoder.layer.12.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 852 |
"vision_tower.encoder.layer.12.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 853 |
"vision_tower.encoder.layer.12.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 854 |
"vision_tower.encoder.layer.12.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -864,7 +859,6 @@
|
|
| 864 |
"vision_tower.encoder.layer.12.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 865 |
"vision_tower.encoder.layer.12.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 866 |
"vision_tower.encoder.layer.12.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 867 |
-
"vision_tower.encoder.layer.13.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 868 |
"vision_tower.encoder.layer.13.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 869 |
"vision_tower.encoder.layer.13.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 870 |
"vision_tower.encoder.layer.13.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -880,7 +874,6 @@
|
|
| 880 |
"vision_tower.encoder.layer.13.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 881 |
"vision_tower.encoder.layer.13.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 882 |
"vision_tower.encoder.layer.13.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 883 |
-
"vision_tower.encoder.layer.14.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 884 |
"vision_tower.encoder.layer.14.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 885 |
"vision_tower.encoder.layer.14.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 886 |
"vision_tower.encoder.layer.14.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -896,7 +889,6 @@
|
|
| 896 |
"vision_tower.encoder.layer.14.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 897 |
"vision_tower.encoder.layer.14.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 898 |
"vision_tower.encoder.layer.14.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 899 |
-
"vision_tower.encoder.layer.15.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 900 |
"vision_tower.encoder.layer.15.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 901 |
"vision_tower.encoder.layer.15.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 902 |
"vision_tower.encoder.layer.15.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -912,7 +904,6 @@
|
|
| 912 |
"vision_tower.encoder.layer.15.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 913 |
"vision_tower.encoder.layer.15.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 914 |
"vision_tower.encoder.layer.15.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 915 |
-
"vision_tower.encoder.layer.16.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 916 |
"vision_tower.encoder.layer.16.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 917 |
"vision_tower.encoder.layer.16.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 918 |
"vision_tower.encoder.layer.16.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -928,7 +919,6 @@
|
|
| 928 |
"vision_tower.encoder.layer.16.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 929 |
"vision_tower.encoder.layer.16.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 930 |
"vision_tower.encoder.layer.16.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 931 |
-
"vision_tower.encoder.layer.17.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 932 |
"vision_tower.encoder.layer.17.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 933 |
"vision_tower.encoder.layer.17.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 934 |
"vision_tower.encoder.layer.17.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -944,7 +934,6 @@
|
|
| 944 |
"vision_tower.encoder.layer.17.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 945 |
"vision_tower.encoder.layer.17.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 946 |
"vision_tower.encoder.layer.17.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 947 |
-
"vision_tower.encoder.layer.18.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 948 |
"vision_tower.encoder.layer.18.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 949 |
"vision_tower.encoder.layer.18.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 950 |
"vision_tower.encoder.layer.18.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -960,7 +949,6 @@
|
|
| 960 |
"vision_tower.encoder.layer.18.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 961 |
"vision_tower.encoder.layer.18.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 962 |
"vision_tower.encoder.layer.18.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 963 |
-
"vision_tower.encoder.layer.19.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 964 |
"vision_tower.encoder.layer.19.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 965 |
"vision_tower.encoder.layer.19.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 966 |
"vision_tower.encoder.layer.19.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -976,7 +964,6 @@
|
|
| 976 |
"vision_tower.encoder.layer.19.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 977 |
"vision_tower.encoder.layer.19.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 978 |
"vision_tower.encoder.layer.19.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 979 |
-
"vision_tower.encoder.layer.2.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 980 |
"vision_tower.encoder.layer.2.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 981 |
"vision_tower.encoder.layer.2.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 982 |
"vision_tower.encoder.layer.2.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -992,7 +979,6 @@
|
|
| 992 |
"vision_tower.encoder.layer.2.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 993 |
"vision_tower.encoder.layer.2.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 994 |
"vision_tower.encoder.layer.2.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 995 |
-
"vision_tower.encoder.layer.20.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 996 |
"vision_tower.encoder.layer.20.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 997 |
"vision_tower.encoder.layer.20.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 998 |
"vision_tower.encoder.layer.20.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1008,7 +994,6 @@
|
|
| 1008 |
"vision_tower.encoder.layer.20.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1009 |
"vision_tower.encoder.layer.20.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1010 |
"vision_tower.encoder.layer.20.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1011 |
-
"vision_tower.encoder.layer.21.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1012 |
"vision_tower.encoder.layer.21.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1013 |
"vision_tower.encoder.layer.21.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1014 |
"vision_tower.encoder.layer.21.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1024,7 +1009,6 @@
|
|
| 1024 |
"vision_tower.encoder.layer.21.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1025 |
"vision_tower.encoder.layer.21.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1026 |
"vision_tower.encoder.layer.21.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1027 |
-
"vision_tower.encoder.layer.22.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1028 |
"vision_tower.encoder.layer.22.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1029 |
"vision_tower.encoder.layer.22.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1030 |
"vision_tower.encoder.layer.22.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1040,7 +1024,6 @@
|
|
| 1040 |
"vision_tower.encoder.layer.22.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1041 |
"vision_tower.encoder.layer.22.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1042 |
"vision_tower.encoder.layer.22.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1043 |
-
"vision_tower.encoder.layer.23.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1044 |
"vision_tower.encoder.layer.23.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1045 |
"vision_tower.encoder.layer.23.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1046 |
"vision_tower.encoder.layer.23.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1056,7 +1039,6 @@
|
|
| 1056 |
"vision_tower.encoder.layer.23.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1057 |
"vision_tower.encoder.layer.23.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1058 |
"vision_tower.encoder.layer.23.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1059 |
-
"vision_tower.encoder.layer.24.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1060 |
"vision_tower.encoder.layer.24.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1061 |
"vision_tower.encoder.layer.24.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1062 |
"vision_tower.encoder.layer.24.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1072,7 +1054,6 @@
|
|
| 1072 |
"vision_tower.encoder.layer.24.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1073 |
"vision_tower.encoder.layer.24.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1074 |
"vision_tower.encoder.layer.24.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1075 |
-
"vision_tower.encoder.layer.25.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1076 |
"vision_tower.encoder.layer.25.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1077 |
"vision_tower.encoder.layer.25.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1078 |
"vision_tower.encoder.layer.25.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1088,7 +1069,6 @@
|
|
| 1088 |
"vision_tower.encoder.layer.25.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1089 |
"vision_tower.encoder.layer.25.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1090 |
"vision_tower.encoder.layer.25.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1091 |
-
"vision_tower.encoder.layer.26.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1092 |
"vision_tower.encoder.layer.26.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1093 |
"vision_tower.encoder.layer.26.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1094 |
"vision_tower.encoder.layer.26.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1104,7 +1084,6 @@
|
|
| 1104 |
"vision_tower.encoder.layer.26.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1105 |
"vision_tower.encoder.layer.26.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1106 |
"vision_tower.encoder.layer.26.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1107 |
-
"vision_tower.encoder.layer.27.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1108 |
"vision_tower.encoder.layer.27.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1109 |
"vision_tower.encoder.layer.27.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1110 |
"vision_tower.encoder.layer.27.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1120,7 +1099,6 @@
|
|
| 1120 |
"vision_tower.encoder.layer.27.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1121 |
"vision_tower.encoder.layer.27.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1122 |
"vision_tower.encoder.layer.27.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1123 |
-
"vision_tower.encoder.layer.28.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1124 |
"vision_tower.encoder.layer.28.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1125 |
"vision_tower.encoder.layer.28.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1126 |
"vision_tower.encoder.layer.28.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1136,7 +1114,6 @@
|
|
| 1136 |
"vision_tower.encoder.layer.28.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1137 |
"vision_tower.encoder.layer.28.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1138 |
"vision_tower.encoder.layer.28.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1139 |
-
"vision_tower.encoder.layer.29.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1140 |
"vision_tower.encoder.layer.29.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1141 |
"vision_tower.encoder.layer.29.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1142 |
"vision_tower.encoder.layer.29.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1152,7 +1129,6 @@
|
|
| 1152 |
"vision_tower.encoder.layer.29.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1153 |
"vision_tower.encoder.layer.29.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1154 |
"vision_tower.encoder.layer.29.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1155 |
-
"vision_tower.encoder.layer.3.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1156 |
"vision_tower.encoder.layer.3.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1157 |
"vision_tower.encoder.layer.3.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1158 |
"vision_tower.encoder.layer.3.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1168,7 +1144,6 @@
|
|
| 1168 |
"vision_tower.encoder.layer.3.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1169 |
"vision_tower.encoder.layer.3.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1170 |
"vision_tower.encoder.layer.3.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1171 |
-
"vision_tower.encoder.layer.30.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1172 |
"vision_tower.encoder.layer.30.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1173 |
"vision_tower.encoder.layer.30.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1174 |
"vision_tower.encoder.layer.30.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1184,7 +1159,6 @@
|
|
| 1184 |
"vision_tower.encoder.layer.30.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1185 |
"vision_tower.encoder.layer.30.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1186 |
"vision_tower.encoder.layer.30.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1187 |
-
"vision_tower.encoder.layer.31.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1188 |
"vision_tower.encoder.layer.31.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1189 |
"vision_tower.encoder.layer.31.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1190 |
"vision_tower.encoder.layer.31.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1200,7 +1174,6 @@
|
|
| 1200 |
"vision_tower.encoder.layer.31.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1201 |
"vision_tower.encoder.layer.31.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1202 |
"vision_tower.encoder.layer.31.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1203 |
-
"vision_tower.encoder.layer.32.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1204 |
"vision_tower.encoder.layer.32.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1205 |
"vision_tower.encoder.layer.32.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1206 |
"vision_tower.encoder.layer.32.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1216,7 +1189,6 @@
|
|
| 1216 |
"vision_tower.encoder.layer.32.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1217 |
"vision_tower.encoder.layer.32.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1218 |
"vision_tower.encoder.layer.32.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1219 |
-
"vision_tower.encoder.layer.33.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1220 |
"vision_tower.encoder.layer.33.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1221 |
"vision_tower.encoder.layer.33.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1222 |
"vision_tower.encoder.layer.33.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1232,7 +1204,6 @@
|
|
| 1232 |
"vision_tower.encoder.layer.33.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1233 |
"vision_tower.encoder.layer.33.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1234 |
"vision_tower.encoder.layer.33.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1235 |
-
"vision_tower.encoder.layer.34.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1236 |
"vision_tower.encoder.layer.34.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1237 |
"vision_tower.encoder.layer.34.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1238 |
"vision_tower.encoder.layer.34.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1248,7 +1219,6 @@
|
|
| 1248 |
"vision_tower.encoder.layer.34.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1249 |
"vision_tower.encoder.layer.34.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1250 |
"vision_tower.encoder.layer.34.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1251 |
-
"vision_tower.encoder.layer.35.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1252 |
"vision_tower.encoder.layer.35.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1253 |
"vision_tower.encoder.layer.35.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1254 |
"vision_tower.encoder.layer.35.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1264,7 +1234,6 @@
|
|
| 1264 |
"vision_tower.encoder.layer.35.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1265 |
"vision_tower.encoder.layer.35.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1266 |
"vision_tower.encoder.layer.35.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1267 |
-
"vision_tower.encoder.layer.36.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1268 |
"vision_tower.encoder.layer.36.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1269 |
"vision_tower.encoder.layer.36.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1270 |
"vision_tower.encoder.layer.36.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1280,7 +1249,6 @@
|
|
| 1280 |
"vision_tower.encoder.layer.36.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1281 |
"vision_tower.encoder.layer.36.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1282 |
"vision_tower.encoder.layer.36.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1283 |
-
"vision_tower.encoder.layer.37.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1284 |
"vision_tower.encoder.layer.37.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1285 |
"vision_tower.encoder.layer.37.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1286 |
"vision_tower.encoder.layer.37.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1296,7 +1264,6 @@
|
|
| 1296 |
"vision_tower.encoder.layer.37.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1297 |
"vision_tower.encoder.layer.37.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1298 |
"vision_tower.encoder.layer.37.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1299 |
-
"vision_tower.encoder.layer.38.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1300 |
"vision_tower.encoder.layer.38.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1301 |
"vision_tower.encoder.layer.38.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1302 |
"vision_tower.encoder.layer.38.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1312,7 +1279,6 @@
|
|
| 1312 |
"vision_tower.encoder.layer.38.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1313 |
"vision_tower.encoder.layer.38.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1314 |
"vision_tower.encoder.layer.38.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1315 |
-
"vision_tower.encoder.layer.39.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1316 |
"vision_tower.encoder.layer.39.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1317 |
"vision_tower.encoder.layer.39.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1318 |
"vision_tower.encoder.layer.39.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1328,7 +1294,6 @@
|
|
| 1328 |
"vision_tower.encoder.layer.39.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1329 |
"vision_tower.encoder.layer.39.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1330 |
"vision_tower.encoder.layer.39.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
| 1331 |
-
"vision_tower.encoder.layer.4.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1332 |
"vision_tower.encoder.layer.4.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1333 |
"vision_tower.encoder.layer.4.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1334 |
"vision_tower.encoder.layer.4.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1344,7 +1309,6 @@
|
|
| 1344 |
"vision_tower.encoder.layer.4.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1345 |
"vision_tower.encoder.layer.4.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1346 |
"vision_tower.encoder.layer.4.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1347 |
-
"vision_tower.encoder.layer.40.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
| 1348 |
"vision_tower.encoder.layer.40.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1349 |
"vision_tower.encoder.layer.40.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1350 |
"vision_tower.encoder.layer.40.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
@@ -1360,7 +1324,6 @@
|
|
| 1360 |
"vision_tower.encoder.layer.40.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1361 |
"vision_tower.encoder.layer.40.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1362 |
"vision_tower.encoder.layer.40.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
| 1363 |
-
"vision_tower.encoder.layer.41.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
| 1364 |
"vision_tower.encoder.layer.41.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1365 |
"vision_tower.encoder.layer.41.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1366 |
"vision_tower.encoder.layer.41.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
@@ -1376,7 +1339,6 @@
|
|
| 1376 |
"vision_tower.encoder.layer.41.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1377 |
"vision_tower.encoder.layer.41.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1378 |
"vision_tower.encoder.layer.41.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
| 1379 |
-
"vision_tower.encoder.layer.42.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
| 1380 |
"vision_tower.encoder.layer.42.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1381 |
"vision_tower.encoder.layer.42.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1382 |
"vision_tower.encoder.layer.42.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
@@ -1392,7 +1354,6 @@
|
|
| 1392 |
"vision_tower.encoder.layer.42.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1393 |
"vision_tower.encoder.layer.42.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1394 |
"vision_tower.encoder.layer.42.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
| 1395 |
-
"vision_tower.encoder.layer.43.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
| 1396 |
"vision_tower.encoder.layer.43.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1397 |
"vision_tower.encoder.layer.43.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1398 |
"vision_tower.encoder.layer.43.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
@@ -1408,7 +1369,6 @@
|
|
| 1408 |
"vision_tower.encoder.layer.43.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1409 |
"vision_tower.encoder.layer.43.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1410 |
"vision_tower.encoder.layer.43.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
| 1411 |
-
"vision_tower.encoder.layer.44.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
| 1412 |
"vision_tower.encoder.layer.44.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1413 |
"vision_tower.encoder.layer.44.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1414 |
"vision_tower.encoder.layer.44.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
@@ -1424,7 +1384,6 @@
|
|
| 1424 |
"vision_tower.encoder.layer.44.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1425 |
"vision_tower.encoder.layer.44.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1426 |
"vision_tower.encoder.layer.44.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
| 1427 |
-
"vision_tower.encoder.layer.5.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1428 |
"vision_tower.encoder.layer.5.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1429 |
"vision_tower.encoder.layer.5.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1430 |
"vision_tower.encoder.layer.5.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1440,7 +1399,6 @@
|
|
| 1440 |
"vision_tower.encoder.layer.5.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1441 |
"vision_tower.encoder.layer.5.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1442 |
"vision_tower.encoder.layer.5.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1443 |
-
"vision_tower.encoder.layer.6.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1444 |
"vision_tower.encoder.layer.6.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1445 |
"vision_tower.encoder.layer.6.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1446 |
"vision_tower.encoder.layer.6.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1456,7 +1414,6 @@
|
|
| 1456 |
"vision_tower.encoder.layer.6.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1457 |
"vision_tower.encoder.layer.6.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1458 |
"vision_tower.encoder.layer.6.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1459 |
-
"vision_tower.encoder.layer.7.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1460 |
"vision_tower.encoder.layer.7.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1461 |
"vision_tower.encoder.layer.7.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1462 |
"vision_tower.encoder.layer.7.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1472,7 +1429,6 @@
|
|
| 1472 |
"vision_tower.encoder.layer.7.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1473 |
"vision_tower.encoder.layer.7.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1474 |
"vision_tower.encoder.layer.7.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1475 |
-
"vision_tower.encoder.layer.8.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1476 |
"vision_tower.encoder.layer.8.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1477 |
"vision_tower.encoder.layer.8.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1478 |
"vision_tower.encoder.layer.8.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
@@ -1488,7 +1444,6 @@
|
|
| 1488 |
"vision_tower.encoder.layer.8.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1489 |
"vision_tower.encoder.layer.8.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1490 |
"vision_tower.encoder.layer.8.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
| 1491 |
-
"vision_tower.encoder.layer.9.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
| 1492 |
"vision_tower.encoder.layer.9.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1493 |
"vision_tower.encoder.layer.9.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1494 |
"vision_tower.encoder.layer.9.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 76776329984
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"language_model.lm_head.weight": "model-00016-of-00016.safetensors",
|
|
|
|
| 784 |
"vision_tower.embeddings.patch_embeddings.projection.bias": "model-00001-of-00016.safetensors",
|
| 785 |
"vision_tower.embeddings.patch_embeddings.projection.weight": "model-00001-of-00016.safetensors",
|
| 786 |
"vision_tower.embeddings.position_embeddings": "model-00001-of-00016.safetensors",
|
|
|
|
| 787 |
"vision_tower.encoder.layer.0.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 788 |
"vision_tower.encoder.layer.0.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 789 |
"vision_tower.encoder.layer.0.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 799 |
"vision_tower.encoder.layer.0.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 800 |
"vision_tower.encoder.layer.0.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 801 |
"vision_tower.encoder.layer.0.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 802 |
"vision_tower.encoder.layer.1.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 803 |
"vision_tower.encoder.layer.1.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 804 |
"vision_tower.encoder.layer.1.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 814 |
"vision_tower.encoder.layer.1.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 815 |
"vision_tower.encoder.layer.1.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 816 |
"vision_tower.encoder.layer.1.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 817 |
"vision_tower.encoder.layer.10.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 818 |
"vision_tower.encoder.layer.10.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 819 |
"vision_tower.encoder.layer.10.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 829 |
"vision_tower.encoder.layer.10.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 830 |
"vision_tower.encoder.layer.10.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 831 |
"vision_tower.encoder.layer.10.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 832 |
"vision_tower.encoder.layer.11.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 833 |
"vision_tower.encoder.layer.11.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 834 |
"vision_tower.encoder.layer.11.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 844 |
"vision_tower.encoder.layer.11.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 845 |
"vision_tower.encoder.layer.11.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 846 |
"vision_tower.encoder.layer.11.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 847 |
"vision_tower.encoder.layer.12.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 848 |
"vision_tower.encoder.layer.12.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 849 |
"vision_tower.encoder.layer.12.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 859 |
"vision_tower.encoder.layer.12.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 860 |
"vision_tower.encoder.layer.12.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 861 |
"vision_tower.encoder.layer.12.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 862 |
"vision_tower.encoder.layer.13.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 863 |
"vision_tower.encoder.layer.13.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 864 |
"vision_tower.encoder.layer.13.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 874 |
"vision_tower.encoder.layer.13.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 875 |
"vision_tower.encoder.layer.13.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 876 |
"vision_tower.encoder.layer.13.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 877 |
"vision_tower.encoder.layer.14.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 878 |
"vision_tower.encoder.layer.14.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 879 |
"vision_tower.encoder.layer.14.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 889 |
"vision_tower.encoder.layer.14.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 890 |
"vision_tower.encoder.layer.14.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 891 |
"vision_tower.encoder.layer.14.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 892 |
"vision_tower.encoder.layer.15.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 893 |
"vision_tower.encoder.layer.15.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 894 |
"vision_tower.encoder.layer.15.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 904 |
"vision_tower.encoder.layer.15.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 905 |
"vision_tower.encoder.layer.15.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 906 |
"vision_tower.encoder.layer.15.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 907 |
"vision_tower.encoder.layer.16.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 908 |
"vision_tower.encoder.layer.16.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 909 |
"vision_tower.encoder.layer.16.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 919 |
"vision_tower.encoder.layer.16.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 920 |
"vision_tower.encoder.layer.16.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 921 |
"vision_tower.encoder.layer.16.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 922 |
"vision_tower.encoder.layer.17.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 923 |
"vision_tower.encoder.layer.17.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 924 |
"vision_tower.encoder.layer.17.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 934 |
"vision_tower.encoder.layer.17.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 935 |
"vision_tower.encoder.layer.17.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 936 |
"vision_tower.encoder.layer.17.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 937 |
"vision_tower.encoder.layer.18.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 938 |
"vision_tower.encoder.layer.18.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 939 |
"vision_tower.encoder.layer.18.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 949 |
"vision_tower.encoder.layer.18.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 950 |
"vision_tower.encoder.layer.18.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 951 |
"vision_tower.encoder.layer.18.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 952 |
"vision_tower.encoder.layer.19.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 953 |
"vision_tower.encoder.layer.19.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 954 |
"vision_tower.encoder.layer.19.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 964 |
"vision_tower.encoder.layer.19.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 965 |
"vision_tower.encoder.layer.19.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 966 |
"vision_tower.encoder.layer.19.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 967 |
"vision_tower.encoder.layer.2.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 968 |
"vision_tower.encoder.layer.2.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 969 |
"vision_tower.encoder.layer.2.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 979 |
"vision_tower.encoder.layer.2.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 980 |
"vision_tower.encoder.layer.2.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 981 |
"vision_tower.encoder.layer.2.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 982 |
"vision_tower.encoder.layer.20.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 983 |
"vision_tower.encoder.layer.20.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 984 |
"vision_tower.encoder.layer.20.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 994 |
"vision_tower.encoder.layer.20.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 995 |
"vision_tower.encoder.layer.20.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 996 |
"vision_tower.encoder.layer.20.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 997 |
"vision_tower.encoder.layer.21.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 998 |
"vision_tower.encoder.layer.21.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 999 |
"vision_tower.encoder.layer.21.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1009 |
"vision_tower.encoder.layer.21.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1010 |
"vision_tower.encoder.layer.21.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1011 |
"vision_tower.encoder.layer.21.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1012 |
"vision_tower.encoder.layer.22.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1013 |
"vision_tower.encoder.layer.22.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1014 |
"vision_tower.encoder.layer.22.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1024 |
"vision_tower.encoder.layer.22.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1025 |
"vision_tower.encoder.layer.22.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1026 |
"vision_tower.encoder.layer.22.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1027 |
"vision_tower.encoder.layer.23.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1028 |
"vision_tower.encoder.layer.23.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1029 |
"vision_tower.encoder.layer.23.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1039 |
"vision_tower.encoder.layer.23.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1040 |
"vision_tower.encoder.layer.23.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1041 |
"vision_tower.encoder.layer.23.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1042 |
"vision_tower.encoder.layer.24.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1043 |
"vision_tower.encoder.layer.24.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1044 |
"vision_tower.encoder.layer.24.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1054 |
"vision_tower.encoder.layer.24.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1055 |
"vision_tower.encoder.layer.24.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1056 |
"vision_tower.encoder.layer.24.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1057 |
"vision_tower.encoder.layer.25.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1058 |
"vision_tower.encoder.layer.25.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1059 |
"vision_tower.encoder.layer.25.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1069 |
"vision_tower.encoder.layer.25.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1070 |
"vision_tower.encoder.layer.25.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1071 |
"vision_tower.encoder.layer.25.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1072 |
"vision_tower.encoder.layer.26.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1073 |
"vision_tower.encoder.layer.26.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1074 |
"vision_tower.encoder.layer.26.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1084 |
"vision_tower.encoder.layer.26.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1085 |
"vision_tower.encoder.layer.26.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1086 |
"vision_tower.encoder.layer.26.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1087 |
"vision_tower.encoder.layer.27.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1088 |
"vision_tower.encoder.layer.27.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1089 |
"vision_tower.encoder.layer.27.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1099 |
"vision_tower.encoder.layer.27.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1100 |
"vision_tower.encoder.layer.27.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1101 |
"vision_tower.encoder.layer.27.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1102 |
"vision_tower.encoder.layer.28.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1103 |
"vision_tower.encoder.layer.28.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1104 |
"vision_tower.encoder.layer.28.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1114 |
"vision_tower.encoder.layer.28.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1115 |
"vision_tower.encoder.layer.28.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1116 |
"vision_tower.encoder.layer.28.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1117 |
"vision_tower.encoder.layer.29.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1118 |
"vision_tower.encoder.layer.29.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1119 |
"vision_tower.encoder.layer.29.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1129 |
"vision_tower.encoder.layer.29.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1130 |
"vision_tower.encoder.layer.29.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1131 |
"vision_tower.encoder.layer.29.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1132 |
"vision_tower.encoder.layer.3.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1133 |
"vision_tower.encoder.layer.3.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1134 |
"vision_tower.encoder.layer.3.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1144 |
"vision_tower.encoder.layer.3.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1145 |
"vision_tower.encoder.layer.3.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1146 |
"vision_tower.encoder.layer.3.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1147 |
"vision_tower.encoder.layer.30.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1148 |
"vision_tower.encoder.layer.30.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1149 |
"vision_tower.encoder.layer.30.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1159 |
"vision_tower.encoder.layer.30.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1160 |
"vision_tower.encoder.layer.30.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1161 |
"vision_tower.encoder.layer.30.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1162 |
"vision_tower.encoder.layer.31.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1163 |
"vision_tower.encoder.layer.31.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1164 |
"vision_tower.encoder.layer.31.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1174 |
"vision_tower.encoder.layer.31.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1175 |
"vision_tower.encoder.layer.31.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1176 |
"vision_tower.encoder.layer.31.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1177 |
"vision_tower.encoder.layer.32.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1178 |
"vision_tower.encoder.layer.32.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1179 |
"vision_tower.encoder.layer.32.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1189 |
"vision_tower.encoder.layer.32.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1190 |
"vision_tower.encoder.layer.32.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1191 |
"vision_tower.encoder.layer.32.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1192 |
"vision_tower.encoder.layer.33.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1193 |
"vision_tower.encoder.layer.33.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1194 |
"vision_tower.encoder.layer.33.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1204 |
"vision_tower.encoder.layer.33.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1205 |
"vision_tower.encoder.layer.33.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1206 |
"vision_tower.encoder.layer.33.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1207 |
"vision_tower.encoder.layer.34.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1208 |
"vision_tower.encoder.layer.34.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1209 |
"vision_tower.encoder.layer.34.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1219 |
"vision_tower.encoder.layer.34.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1220 |
"vision_tower.encoder.layer.34.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1221 |
"vision_tower.encoder.layer.34.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1222 |
"vision_tower.encoder.layer.35.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1223 |
"vision_tower.encoder.layer.35.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1224 |
"vision_tower.encoder.layer.35.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1234 |
"vision_tower.encoder.layer.35.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1235 |
"vision_tower.encoder.layer.35.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1236 |
"vision_tower.encoder.layer.35.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1237 |
"vision_tower.encoder.layer.36.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1238 |
"vision_tower.encoder.layer.36.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1239 |
"vision_tower.encoder.layer.36.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1249 |
"vision_tower.encoder.layer.36.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1250 |
"vision_tower.encoder.layer.36.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1251 |
"vision_tower.encoder.layer.36.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1252 |
"vision_tower.encoder.layer.37.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1253 |
"vision_tower.encoder.layer.37.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1254 |
"vision_tower.encoder.layer.37.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1264 |
"vision_tower.encoder.layer.37.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1265 |
"vision_tower.encoder.layer.37.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1266 |
"vision_tower.encoder.layer.37.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1267 |
"vision_tower.encoder.layer.38.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1268 |
"vision_tower.encoder.layer.38.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1269 |
"vision_tower.encoder.layer.38.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1279 |
"vision_tower.encoder.layer.38.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1280 |
"vision_tower.encoder.layer.38.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1281 |
"vision_tower.encoder.layer.38.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1282 |
"vision_tower.encoder.layer.39.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1283 |
"vision_tower.encoder.layer.39.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1284 |
"vision_tower.encoder.layer.39.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1294 |
"vision_tower.encoder.layer.39.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
| 1295 |
"vision_tower.encoder.layer.39.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
| 1296 |
"vision_tower.encoder.layer.39.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
|
| 1297 |
"vision_tower.encoder.layer.4.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1298 |
"vision_tower.encoder.layer.4.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1299 |
"vision_tower.encoder.layer.4.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1309 |
"vision_tower.encoder.layer.4.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1310 |
"vision_tower.encoder.layer.4.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1311 |
"vision_tower.encoder.layer.4.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1312 |
"vision_tower.encoder.layer.40.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
| 1313 |
"vision_tower.encoder.layer.40.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
| 1314 |
"vision_tower.encoder.layer.40.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
|
| 1324 |
"vision_tower.encoder.layer.40.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1325 |
"vision_tower.encoder.layer.40.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1326 |
"vision_tower.encoder.layer.40.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
|
| 1327 |
"vision_tower.encoder.layer.41.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1328 |
"vision_tower.encoder.layer.41.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1329 |
"vision_tower.encoder.layer.41.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
|
| 1339 |
"vision_tower.encoder.layer.41.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1340 |
"vision_tower.encoder.layer.41.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1341 |
"vision_tower.encoder.layer.41.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
|
| 1342 |
"vision_tower.encoder.layer.42.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1343 |
"vision_tower.encoder.layer.42.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1344 |
"vision_tower.encoder.layer.42.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
|
| 1354 |
"vision_tower.encoder.layer.42.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1355 |
"vision_tower.encoder.layer.42.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1356 |
"vision_tower.encoder.layer.42.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
|
| 1357 |
"vision_tower.encoder.layer.43.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1358 |
"vision_tower.encoder.layer.43.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1359 |
"vision_tower.encoder.layer.43.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
|
| 1369 |
"vision_tower.encoder.layer.43.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1370 |
"vision_tower.encoder.layer.43.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1371 |
"vision_tower.encoder.layer.43.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
|
| 1372 |
"vision_tower.encoder.layer.44.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
| 1373 |
"vision_tower.encoder.layer.44.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
| 1374 |
"vision_tower.encoder.layer.44.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
|
| 1384 |
"vision_tower.encoder.layer.44.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
| 1385 |
"vision_tower.encoder.layer.44.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
| 1386 |
"vision_tower.encoder.layer.44.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
|
| 1387 |
"vision_tower.encoder.layer.5.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1388 |
"vision_tower.encoder.layer.5.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1389 |
"vision_tower.encoder.layer.5.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1399 |
"vision_tower.encoder.layer.5.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1400 |
"vision_tower.encoder.layer.5.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1401 |
"vision_tower.encoder.layer.5.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1402 |
"vision_tower.encoder.layer.6.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1403 |
"vision_tower.encoder.layer.6.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1404 |
"vision_tower.encoder.layer.6.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1414 |
"vision_tower.encoder.layer.6.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1415 |
"vision_tower.encoder.layer.6.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1416 |
"vision_tower.encoder.layer.6.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1417 |
"vision_tower.encoder.layer.7.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1418 |
"vision_tower.encoder.layer.7.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1419 |
"vision_tower.encoder.layer.7.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1429 |
"vision_tower.encoder.layer.7.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1430 |
"vision_tower.encoder.layer.7.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1431 |
"vision_tower.encoder.layer.7.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1432 |
"vision_tower.encoder.layer.8.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1433 |
"vision_tower.encoder.layer.8.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1434 |
"vision_tower.encoder.layer.8.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
|
| 1444 |
"vision_tower.encoder.layer.8.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
| 1445 |
"vision_tower.encoder.layer.8.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
| 1446 |
"vision_tower.encoder.layer.8.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
|
| 1447 |
"vision_tower.encoder.layer.9.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
| 1448 |
"vision_tower.encoder.layer.9.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
| 1449 |
"vision_tower.encoder.layer.9.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|