Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -112,7 +112,7 @@ We welcome MLLM benchmark developers to assess our InternVL1.5 and InternVL2 ser
|
|
| 112 |
|
| 113 |
We provide an example code to run InternVL2-40B using `transformers`.
|
| 114 |
|
| 115 |
-
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/).
|
| 116 |
|
| 117 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
| 118 |
|
|
@@ -162,7 +162,7 @@ def split_model(model_name):
|
|
| 162 |
device_map = {}
|
| 163 |
world_size = torch.cuda.device_count()
|
| 164 |
num_layers = {
|
| 165 |
-
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
| 166 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
| 167 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
| 168 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
|
@@ -284,7 +284,7 @@ def split_model(model_name):
|
|
| 284 |
device_map = {}
|
| 285 |
world_size = torch.cuda.device_count()
|
| 286 |
num_layers = {
|
| 287 |
-
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
| 288 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
| 289 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
| 290 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
|
|
|
| 112 |
|
| 113 |
We provide an example code to run InternVL2-40B using `transformers`.
|
| 114 |
|
| 115 |
+
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/).
|
| 116 |
|
| 117 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
| 118 |
|
|
|
|
| 162 |
device_map = {}
|
| 163 |
world_size = torch.cuda.device_count()
|
| 164 |
num_layers = {
|
| 165 |
+
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
| 166 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
| 167 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
| 168 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|
|
|
|
| 284 |
device_map = {}
|
| 285 |
world_size = torch.cuda.device_count()
|
| 286 |
num_layers = {
|
| 287 |
+
'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
|
| 288 |
'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
|
| 289 |
# Since the first GPU will be used for ViT, treat it as half a GPU.
|
| 290 |
num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
|