Upload folder using huggingface_hub
Browse files
README.md
CHANGED
@@ -112,7 +112,7 @@ We welcome MLLM benchmark developers to assess our InternVL1.5 and InternVL2 ser
|
|
112 |
|
113 |
We provide an example code to run InternVL2-40B using `transformers`.
|
114 |
|
115 |
-
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/).
|
116 |
|
117 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
118 |
|
@@ -162,7 +162,7 @@ def split_model(model_name):
|
|
162 |
device_map = {}
|
163 |
world_size = torch.cuda.device_count()
|
164 |
num_layers = {
|
165 |
-
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
166 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
167 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
168 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
@@ -284,7 +284,7 @@ def split_model(model_name):
|
|
284 |
device_map = {}
|
285 |
world_size = torch.cuda.device_count()
|
286 |
num_layers = {
|
287 |
-
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
288 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
289 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
290 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
|
|
112 |
|
113 |
We provide an example code to run InternVL2-40B using `transformers`.
|
114 |
|
115 |
+
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/).
|
116 |
|
117 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
118 |
|
|
|
162 |
device_map = {}
|
163 |
world_size = torch.cuda.device_count()
|
164 |
num_layers = {
|
165 |
+
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
166 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
167 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
168 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
|
|
284 |
device_map = {}
|
285 |
world_size = torch.cuda.device_count()
|
286 |
num_layers = {
|
287 |
+
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
288 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
289 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
290 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|