Update config.json
Browse files- config.json +6 -6
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "./checkpoints/
|
| 3 |
"architectures": [
|
| 4 |
"LlavaLlamaModel"
|
| 5 |
],
|
|
@@ -11,7 +11,7 @@
|
|
| 11 |
"image_aspect_ratio": "resize",
|
| 12 |
"interpolate_mode": "linear",
|
| 13 |
"llm_cfg": {
|
| 14 |
-
"_name_or_path": "./checkpoints/
|
| 15 |
"add_cross_attention": false,
|
| 16 |
"architectures": [
|
| 17 |
"LlamaForCausalLM"
|
|
@@ -96,7 +96,7 @@
|
|
| 96 |
},
|
| 97 |
"mm_hidden_size": 1152,
|
| 98 |
"mm_projector_cfg": {
|
| 99 |
-
"_name_or_path": "./checkpoints/
|
| 100 |
"add_cross_attention": false,
|
| 101 |
"architectures": [
|
| 102 |
"MultimodalProjector"
|
|
@@ -170,7 +170,7 @@
|
|
| 170 |
"model_type": "llava_llama",
|
| 171 |
"num_video_frames": 8,
|
| 172 |
"region_extractor_cfg": {
|
| 173 |
-
"_name_or_path": "./checkpoints/
|
| 174 |
"add_cross_attention": false,
|
| 175 |
"architectures": [
|
| 176 |
"RegionExtractor"
|
|
@@ -235,7 +235,7 @@
|
|
| 235 |
"typical_p": 1.0,
|
| 236 |
"use_bfloat16": false
|
| 237 |
},
|
| 238 |
-
"resume_path": "./checkpoints/
|
| 239 |
"s2": false,
|
| 240 |
"s2_max_split_size": 336,
|
| 241 |
"s2_scales": "336,672,1008",
|
|
@@ -246,7 +246,7 @@
|
|
| 246 |
"tune_vision_tower": true,
|
| 247 |
"vision_resolution": -1,
|
| 248 |
"vision_tower_cfg": {
|
| 249 |
-
"_name_or_path": "./checkpoints/
|
| 250 |
"add_cross_attention": false,
|
| 251 |
"architectures": [
|
| 252 |
"SiglipVisionModel"
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
|
| 3 |
"architectures": [
|
| 4 |
"LlavaLlamaModel"
|
| 5 |
],
|
|
|
|
| 11 |
"image_aspect_ratio": "resize",
|
| 12 |
"interpolate_mode": "linear",
|
| 13 |
"llm_cfg": {
|
| 14 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/llm",
|
| 15 |
"add_cross_attention": false,
|
| 16 |
"architectures": [
|
| 17 |
"LlamaForCausalLM"
|
|
|
|
| 96 |
},
|
| 97 |
"mm_hidden_size": 1152,
|
| 98 |
"mm_projector_cfg": {
|
| 99 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/mm_projector",
|
| 100 |
"add_cross_attention": false,
|
| 101 |
"architectures": [
|
| 102 |
"MultimodalProjector"
|
|
|
|
| 170 |
"model_type": "llava_llama",
|
| 171 |
"num_video_frames": 8,
|
| 172 |
"region_extractor_cfg": {
|
| 173 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/region_extractor",
|
| 174 |
"add_cross_attention": false,
|
| 175 |
"architectures": [
|
| 176 |
"RegionExtractor"
|
|
|
|
| 235 |
"typical_p": 1.0,
|
| 236 |
"use_bfloat16": false
|
| 237 |
},
|
| 238 |
+
"resume_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
|
| 239 |
"s2": false,
|
| 240 |
"s2_max_split_size": 336,
|
| 241 |
"s2_scales": "336,672,1008",
|
|
|
|
| 246 |
"tune_vision_tower": true,
|
| 247 |
"vision_resolution": -1,
|
| 248 |
"vision_tower_cfg": {
|
| 249 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/vision_tower",
|
| 250 |
"add_cross_attention": false,
|
| 251 |
"architectures": [
|
| 252 |
"SiglipVisionModel"
|