Upload folder using huggingface_hub
Browse files- README.md +2 -2
- config.json +5 -3
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +1 -1
- quantization_config.json +387 -385
README.md
CHANGED
|
@@ -25,11 +25,11 @@ tags:
|
|
| 25 |
|
| 26 |
<p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
|
| 27 |
|
| 28 |
-
<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it
|
| 29 |
|
| 30 |
A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
|
| 31 |
|
| 32 |
-
Master import for ST:
|
| 33 |
|
| 34 |
## Reasoning
|
| 35 |
|
|
|
|
| 25 |
|
| 26 |
<p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
|
| 27 |
|
| 28 |
+
<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it. (or not, see below.)</p>
|
| 29 |
|
| 30 |
A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
|
| 31 |
|
| 32 |
+
Master import for ST: https://files.catbox.moe/w812at.png
|
| 33 |
|
| 34 |
## Reasoning
|
| 35 |
|
config.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
],
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
"bos_token_id": 151643,
|
| 8 |
-
"eos_token_id":
|
| 9 |
"hidden_act": "silu",
|
| 10 |
"hidden_size": 5120,
|
| 11 |
"initializer_range": 0.02,
|
|
@@ -28,11 +28,13 @@
|
|
| 28 |
"vocab_size": 151665,
|
| 29 |
"quantization_config": {
|
| 30 |
"quant_method": "exl3",
|
| 31 |
-
"version": "0.0.
|
| 32 |
"bits": 4.5,
|
|
|
|
| 33 |
"calibration": {
|
| 34 |
"rows": 100,
|
| 35 |
"cols": 2048
|
| 36 |
-
}
|
|
|
|
| 37 |
}
|
| 38 |
}
|
|
|
|
| 5 |
],
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
"hidden_act": "silu",
|
| 10 |
"hidden_size": 5120,
|
| 11 |
"initializer_range": 0.02,
|
|
|
|
| 28 |
"vocab_size": 151665,
|
| 29 |
"quantization_config": {
|
| 30 |
"quant_method": "exl3",
|
| 31 |
+
"version": "0.0.4",
|
| 32 |
"bits": 4.5,
|
| 33 |
+
"head_bits": 6,
|
| 34 |
"calibration": {
|
| 35 |
"rows": 100,
|
| 36 |
"cols": 2048
|
| 37 |
+
},
|
| 38 |
+
"out_scales": "auto"
|
| 39 |
}
|
| 40 |
}
|
model-00001-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8413645240
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04b1d3f2f4220a2ec286bd0995d310b60651c9fd50a2ee7ad3c822a4d000595f
|
| 3 |
size 8413645240
|
model-00002-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dcf0e0f248fd776b6c64b082281916c36b78e2548c1359bf620b3408bfe7c7a
|
| 3 |
+
size 8507344792
|
model-00003-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e6935c2c3d8916b4ce7b5668e4940a34b505f5595b6bd88caaec7f388a794c
|
| 3 |
+
size 2782595248
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 19703404800
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
quantization_config.json
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"quant_method": "exl3",
|
| 3 |
-
"version": "0.0.
|
| 4 |
"bits": 4.5,
|
|
|
|
| 5 |
"calibration": {
|
| 6 |
"rows": 100,
|
| 7 |
"cols": 2048
|
| 8 |
},
|
|
|
|
| 9 |
"tensor_storage": {
|
| 10 |
"model.embed_tokens": {
|
| 11 |
"stored_tensors": {
|
|
@@ -585,14 +587,14 @@
|
|
| 585 |
"shape": [
|
| 586 |
320,
|
| 587 |
64,
|
| 588 |
-
|
| 589 |
],
|
| 590 |
-
"n_bytes":
|
| 591 |
"dtype": "torch.int16"
|
| 592 |
}
|
| 593 |
},
|
| 594 |
"quant_format": "exl3",
|
| 595 |
-
"bits_per_weight":
|
| 596 |
},
|
| 597 |
"model.layers.2.self_attn.v_proj": {
|
| 598 |
"stored_tensors": {
|
|
@@ -621,14 +623,14 @@
|
|
| 621 |
"shape": [
|
| 622 |
320,
|
| 623 |
64,
|
| 624 |
-
|
| 625 |
],
|
| 626 |
-
"n_bytes":
|
| 627 |
"dtype": "torch.int16"
|
| 628 |
}
|
| 629 |
},
|
| 630 |
"quant_format": "exl3",
|
| 631 |
-
"bits_per_weight":
|
| 632 |
},
|
| 633 |
"model.layers.2.self_attn.o_proj": {
|
| 634 |
"stored_tensors": {
|
|
@@ -1323,14 +1325,14 @@
|
|
| 1323 |
"shape": [
|
| 1324 |
320,
|
| 1325 |
64,
|
| 1326 |
-
|
| 1327 |
],
|
| 1328 |
-
"n_bytes":
|
| 1329 |
"dtype": "torch.int16"
|
| 1330 |
}
|
| 1331 |
},
|
| 1332 |
"quant_format": "exl3",
|
| 1333 |
-
"bits_per_weight":
|
| 1334 |
},
|
| 1335 |
"model.layers.5.self_attn.v_proj": {
|
| 1336 |
"stored_tensors": {
|
|
@@ -1779,14 +1781,14 @@
|
|
| 1779 |
"shape": [
|
| 1780 |
320,
|
| 1781 |
320,
|
| 1782 |
-
|
| 1783 |
],
|
| 1784 |
-
"n_bytes":
|
| 1785 |
"dtype": "torch.int16"
|
| 1786 |
}
|
| 1787 |
},
|
| 1788 |
"quant_format": "exl3",
|
| 1789 |
-
"bits_per_weight":
|
| 1790 |
},
|
| 1791 |
"model.layers.7.self_attn.k_proj": {
|
| 1792 |
"stored_tensors": {
|
|
@@ -1815,14 +1817,14 @@
|
|
| 1815 |
"shape": [
|
| 1816 |
320,
|
| 1817 |
64,
|
| 1818 |
-
|
| 1819 |
],
|
| 1820 |
-
"n_bytes":
|
| 1821 |
"dtype": "torch.int16"
|
| 1822 |
}
|
| 1823 |
},
|
| 1824 |
"quant_format": "exl3",
|
| 1825 |
-
"bits_per_weight":
|
| 1826 |
},
|
| 1827 |
"model.layers.7.self_attn.v_proj": {
|
| 1828 |
"stored_tensors": {
|
|
@@ -1851,14 +1853,14 @@
|
|
| 1851 |
"shape": [
|
| 1852 |
320,
|
| 1853 |
64,
|
| 1854 |
-
|
| 1855 |
],
|
| 1856 |
-
"n_bytes":
|
| 1857 |
"dtype": "torch.int16"
|
| 1858 |
}
|
| 1859 |
},
|
| 1860 |
"quant_format": "exl3",
|
| 1861 |
-
"bits_per_weight":
|
| 1862 |
},
|
| 1863 |
"model.layers.7.self_attn.o_proj": {
|
| 1864 |
"stored_tensors": {
|
|
@@ -1880,14 +1882,14 @@
|
|
| 1880 |
"shape": [
|
| 1881 |
320,
|
| 1882 |
320,
|
| 1883 |
-
|
| 1884 |
],
|
| 1885 |
-
"n_bytes":
|
| 1886 |
"dtype": "torch.int16"
|
| 1887 |
}
|
| 1888 |
},
|
| 1889 |
"quant_format": "exl3",
|
| 1890 |
-
"bits_per_weight":
|
| 1891 |
},
|
| 1892 |
"model.layers.7.post_attention_layernorm": {
|
| 1893 |
"stored_tensors": {
|
|
@@ -1920,14 +1922,14 @@
|
|
| 1920 |
"shape": [
|
| 1921 |
320,
|
| 1922 |
1728,
|
| 1923 |
-
|
| 1924 |
],
|
| 1925 |
-
"n_bytes":
|
| 1926 |
"dtype": "torch.int16"
|
| 1927 |
}
|
| 1928 |
},
|
| 1929 |
"quant_format": "exl3",
|
| 1930 |
-
"bits_per_weight":
|
| 1931 |
},
|
| 1932 |
"model.layers.7.mlp.gate_proj": {
|
| 1933 |
"stored_tensors": {
|
|
@@ -2025,14 +2027,14 @@
|
|
| 2025 |
"shape": [
|
| 2026 |
320,
|
| 2027 |
320,
|
| 2028 |
-
|
| 2029 |
],
|
| 2030 |
-
"n_bytes":
|
| 2031 |
"dtype": "torch.int16"
|
| 2032 |
}
|
| 2033 |
},
|
| 2034 |
"quant_format": "exl3",
|
| 2035 |
-
"bits_per_weight":
|
| 2036 |
},
|
| 2037 |
"model.layers.8.self_attn.k_proj": {
|
| 2038 |
"stored_tensors": {
|
|
@@ -2126,14 +2128,14 @@
|
|
| 2126 |
"shape": [
|
| 2127 |
320,
|
| 2128 |
320,
|
| 2129 |
-
|
| 2130 |
],
|
| 2131 |
-
"n_bytes":
|
| 2132 |
"dtype": "torch.int16"
|
| 2133 |
}
|
| 2134 |
},
|
| 2135 |
"quant_format": "exl3",
|
| 2136 |
-
"bits_per_weight":
|
| 2137 |
},
|
| 2138 |
"model.layers.8.post_attention_layernorm": {
|
| 2139 |
"stored_tensors": {
|
|
@@ -2166,14 +2168,14 @@
|
|
| 2166 |
"shape": [
|
| 2167 |
320,
|
| 2168 |
1728,
|
| 2169 |
-
|
| 2170 |
],
|
| 2171 |
-
"n_bytes":
|
| 2172 |
"dtype": "torch.int16"
|
| 2173 |
}
|
| 2174 |
},
|
| 2175 |
"quant_format": "exl3",
|
| 2176 |
-
"bits_per_weight":
|
| 2177 |
},
|
| 2178 |
"model.layers.8.mlp.gate_proj": {
|
| 2179 |
"stored_tensors": {
|
|
@@ -2517,14 +2519,14 @@
|
|
| 2517 |
"shape": [
|
| 2518 |
320,
|
| 2519 |
320,
|
| 2520 |
-
|
| 2521 |
],
|
| 2522 |
-
"n_bytes":
|
| 2523 |
"dtype": "torch.int16"
|
| 2524 |
}
|
| 2525 |
},
|
| 2526 |
"quant_format": "exl3",
|
| 2527 |
-
"bits_per_weight":
|
| 2528 |
},
|
| 2529 |
"model.layers.10.self_attn.k_proj": {
|
| 2530 |
"stored_tensors": {
|
|
@@ -2553,14 +2555,14 @@
|
|
| 2553 |
"shape": [
|
| 2554 |
320,
|
| 2555 |
64,
|
| 2556 |
-
|
| 2557 |
],
|
| 2558 |
-
"n_bytes":
|
| 2559 |
"dtype": "torch.int16"
|
| 2560 |
}
|
| 2561 |
},
|
| 2562 |
"quant_format": "exl3",
|
| 2563 |
-
"bits_per_weight":
|
| 2564 |
},
|
| 2565 |
"model.layers.10.self_attn.v_proj": {
|
| 2566 |
"stored_tensors": {
|
|
@@ -2618,14 +2620,14 @@
|
|
| 2618 |
"shape": [
|
| 2619 |
320,
|
| 2620 |
320,
|
| 2621 |
-
|
| 2622 |
],
|
| 2623 |
-
"n_bytes":
|
| 2624 |
"dtype": "torch.int16"
|
| 2625 |
}
|
| 2626 |
},
|
| 2627 |
"quant_format": "exl3",
|
| 2628 |
-
"bits_per_weight":
|
| 2629 |
},
|
| 2630 |
"model.layers.10.post_attention_layernorm": {
|
| 2631 |
"stored_tensors": {
|
|
@@ -2658,14 +2660,14 @@
|
|
| 2658 |
"shape": [
|
| 2659 |
320,
|
| 2660 |
1728,
|
| 2661 |
-
|
| 2662 |
],
|
| 2663 |
-
"n_bytes":
|
| 2664 |
"dtype": "torch.int16"
|
| 2665 |
}
|
| 2666 |
},
|
| 2667 |
"quant_format": "exl3",
|
| 2668 |
-
"bits_per_weight":
|
| 2669 |
},
|
| 2670 |
"model.layers.10.mlp.gate_proj": {
|
| 2671 |
"stored_tensors": {
|
|
@@ -2763,14 +2765,14 @@
|
|
| 2763 |
"shape": [
|
| 2764 |
320,
|
| 2765 |
320,
|
| 2766 |
-
|
| 2767 |
],
|
| 2768 |
-
"n_bytes":
|
| 2769 |
"dtype": "torch.int16"
|
| 2770 |
}
|
| 2771 |
},
|
| 2772 |
"quant_format": "exl3",
|
| 2773 |
-
"bits_per_weight":
|
| 2774 |
},
|
| 2775 |
"model.layers.11.self_attn.k_proj": {
|
| 2776 |
"stored_tensors": {
|
|
@@ -2864,14 +2866,14 @@
|
|
| 2864 |
"shape": [
|
| 2865 |
320,
|
| 2866 |
320,
|
| 2867 |
-
|
| 2868 |
],
|
| 2869 |
-
"n_bytes":
|
| 2870 |
"dtype": "torch.int16"
|
| 2871 |
}
|
| 2872 |
},
|
| 2873 |
"quant_format": "exl3",
|
| 2874 |
-
"bits_per_weight":
|
| 2875 |
},
|
| 2876 |
"model.layers.11.post_attention_layernorm": {
|
| 2877 |
"stored_tensors": {
|
|
@@ -2904,14 +2906,14 @@
|
|
| 2904 |
"shape": [
|
| 2905 |
320,
|
| 2906 |
1728,
|
| 2907 |
-
|
| 2908 |
],
|
| 2909 |
-
"n_bytes":
|
| 2910 |
"dtype": "torch.int16"
|
| 2911 |
}
|
| 2912 |
},
|
| 2913 |
"quant_format": "exl3",
|
| 2914 |
-
"bits_per_weight":
|
| 2915 |
},
|
| 2916 |
"model.layers.11.mlp.gate_proj": {
|
| 2917 |
"stored_tensors": {
|
|
@@ -3255,14 +3257,14 @@
|
|
| 3255 |
"shape": [
|
| 3256 |
320,
|
| 3257 |
320,
|
| 3258 |
-
|
| 3259 |
],
|
| 3260 |
-
"n_bytes":
|
| 3261 |
"dtype": "torch.int16"
|
| 3262 |
}
|
| 3263 |
},
|
| 3264 |
"quant_format": "exl3",
|
| 3265 |
-
"bits_per_weight":
|
| 3266 |
},
|
| 3267 |
"model.layers.13.self_attn.k_proj": {
|
| 3268 |
"stored_tensors": {
|
|
@@ -3291,14 +3293,14 @@
|
|
| 3291 |
"shape": [
|
| 3292 |
320,
|
| 3293 |
64,
|
| 3294 |
-
|
| 3295 |
],
|
| 3296 |
-
"n_bytes":
|
| 3297 |
"dtype": "torch.int16"
|
| 3298 |
}
|
| 3299 |
},
|
| 3300 |
"quant_format": "exl3",
|
| 3301 |
-
"bits_per_weight":
|
| 3302 |
},
|
| 3303 |
"model.layers.13.self_attn.v_proj": {
|
| 3304 |
"stored_tensors": {
|
|
@@ -3356,14 +3358,14 @@
|
|
| 3356 |
"shape": [
|
| 3357 |
320,
|
| 3358 |
320,
|
| 3359 |
-
|
| 3360 |
],
|
| 3361 |
-
"n_bytes":
|
| 3362 |
"dtype": "torch.int16"
|
| 3363 |
}
|
| 3364 |
},
|
| 3365 |
"quant_format": "exl3",
|
| 3366 |
-
"bits_per_weight":
|
| 3367 |
},
|
| 3368 |
"model.layers.13.post_attention_layernorm": {
|
| 3369 |
"stored_tensors": {
|
|
@@ -3396,14 +3398,14 @@
|
|
| 3396 |
"shape": [
|
| 3397 |
320,
|
| 3398 |
1728,
|
| 3399 |
-
|
| 3400 |
],
|
| 3401 |
-
"n_bytes":
|
| 3402 |
"dtype": "torch.int16"
|
| 3403 |
}
|
| 3404 |
},
|
| 3405 |
"quant_format": "exl3",
|
| 3406 |
-
"bits_per_weight":
|
| 3407 |
},
|
| 3408 |
"model.layers.13.mlp.gate_proj": {
|
| 3409 |
"stored_tensors": {
|
|
@@ -3501,14 +3503,14 @@
|
|
| 3501 |
"shape": [
|
| 3502 |
320,
|
| 3503 |
320,
|
| 3504 |
-
|
| 3505 |
],
|
| 3506 |
-
"n_bytes":
|
| 3507 |
"dtype": "torch.int16"
|
| 3508 |
}
|
| 3509 |
},
|
| 3510 |
"quant_format": "exl3",
|
| 3511 |
-
"bits_per_weight":
|
| 3512 |
},
|
| 3513 |
"model.layers.14.self_attn.k_proj": {
|
| 3514 |
"stored_tensors": {
|
|
@@ -3537,14 +3539,14 @@
|
|
| 3537 |
"shape": [
|
| 3538 |
320,
|
| 3539 |
64,
|
| 3540 |
-
|
| 3541 |
],
|
| 3542 |
-
"n_bytes":
|
| 3543 |
"dtype": "torch.int16"
|
| 3544 |
}
|
| 3545 |
},
|
| 3546 |
"quant_format": "exl3",
|
| 3547 |
-
"bits_per_weight":
|
| 3548 |
},
|
| 3549 |
"model.layers.14.self_attn.v_proj": {
|
| 3550 |
"stored_tensors": {
|
|
@@ -3573,14 +3575,14 @@
|
|
| 3573 |
"shape": [
|
| 3574 |
320,
|
| 3575 |
64,
|
| 3576 |
-
|
| 3577 |
],
|
| 3578 |
-
"n_bytes":
|
| 3579 |
"dtype": "torch.int16"
|
| 3580 |
}
|
| 3581 |
},
|
| 3582 |
"quant_format": "exl3",
|
| 3583 |
-
"bits_per_weight":
|
| 3584 |
},
|
| 3585 |
"model.layers.14.self_attn.o_proj": {
|
| 3586 |
"stored_tensors": {
|
|
@@ -3602,14 +3604,14 @@
|
|
| 3602 |
"shape": [
|
| 3603 |
320,
|
| 3604 |
320,
|
| 3605 |
-
|
| 3606 |
],
|
| 3607 |
-
"n_bytes":
|
| 3608 |
"dtype": "torch.int16"
|
| 3609 |
}
|
| 3610 |
},
|
| 3611 |
"quant_format": "exl3",
|
| 3612 |
-
"bits_per_weight":
|
| 3613 |
},
|
| 3614 |
"model.layers.14.post_attention_layernorm": {
|
| 3615 |
"stored_tensors": {
|
|
@@ -3642,14 +3644,14 @@
|
|
| 3642 |
"shape": [
|
| 3643 |
320,
|
| 3644 |
1728,
|
| 3645 |
-
|
| 3646 |
],
|
| 3647 |
-
"n_bytes":
|
| 3648 |
"dtype": "torch.int16"
|
| 3649 |
}
|
| 3650 |
},
|
| 3651 |
"quant_format": "exl3",
|
| 3652 |
-
"bits_per_weight":
|
| 3653 |
},
|
| 3654 |
"model.layers.14.mlp.gate_proj": {
|
| 3655 |
"stored_tensors": {
|
|
@@ -3747,14 +3749,14 @@
|
|
| 3747 |
"shape": [
|
| 3748 |
320,
|
| 3749 |
320,
|
| 3750 |
-
|
| 3751 |
],
|
| 3752 |
-
"n_bytes":
|
| 3753 |
"dtype": "torch.int16"
|
| 3754 |
}
|
| 3755 |
},
|
| 3756 |
"quant_format": "exl3",
|
| 3757 |
-
"bits_per_weight":
|
| 3758 |
},
|
| 3759 |
"model.layers.15.self_attn.k_proj": {
|
| 3760 |
"stored_tensors": {
|
|
@@ -3783,14 +3785,14 @@
|
|
| 3783 |
"shape": [
|
| 3784 |
320,
|
| 3785 |
64,
|
| 3786 |
-
|
| 3787 |
],
|
| 3788 |
-
"n_bytes":
|
| 3789 |
"dtype": "torch.int16"
|
| 3790 |
}
|
| 3791 |
},
|
| 3792 |
"quant_format": "exl3",
|
| 3793 |
-
"bits_per_weight":
|
| 3794 |
},
|
| 3795 |
"model.layers.15.self_attn.v_proj": {
|
| 3796 |
"stored_tensors": {
|
|
@@ -3819,14 +3821,14 @@
|
|
| 3819 |
"shape": [
|
| 3820 |
320,
|
| 3821 |
64,
|
| 3822 |
-
|
| 3823 |
],
|
| 3824 |
-
"n_bytes":
|
| 3825 |
"dtype": "torch.int16"
|
| 3826 |
}
|
| 3827 |
},
|
| 3828 |
"quant_format": "exl3",
|
| 3829 |
-
"bits_per_weight":
|
| 3830 |
},
|
| 3831 |
"model.layers.15.self_attn.o_proj": {
|
| 3832 |
"stored_tensors": {
|
|
@@ -3848,14 +3850,14 @@
|
|
| 3848 |
"shape": [
|
| 3849 |
320,
|
| 3850 |
320,
|
| 3851 |
-
|
| 3852 |
],
|
| 3853 |
-
"n_bytes":
|
| 3854 |
"dtype": "torch.int16"
|
| 3855 |
}
|
| 3856 |
},
|
| 3857 |
"quant_format": "exl3",
|
| 3858 |
-
"bits_per_weight":
|
| 3859 |
},
|
| 3860 |
"model.layers.15.post_attention_layernorm": {
|
| 3861 |
"stored_tensors": {
|
|
@@ -3888,14 +3890,14 @@
|
|
| 3888 |
"shape": [
|
| 3889 |
320,
|
| 3890 |
1728,
|
| 3891 |
-
|
| 3892 |
],
|
| 3893 |
-
"n_bytes":
|
| 3894 |
"dtype": "torch.int16"
|
| 3895 |
}
|
| 3896 |
},
|
| 3897 |
"quant_format": "exl3",
|
| 3898 |
-
"bits_per_weight":
|
| 3899 |
},
|
| 3900 |
"model.layers.15.mlp.gate_proj": {
|
| 3901 |
"stored_tensors": {
|
|
@@ -4239,14 +4241,14 @@
|
|
| 4239 |
"shape": [
|
| 4240 |
320,
|
| 4241 |
320,
|
| 4242 |
-
|
| 4243 |
],
|
| 4244 |
-
"n_bytes":
|
| 4245 |
"dtype": "torch.int16"
|
| 4246 |
}
|
| 4247 |
},
|
| 4248 |
"quant_format": "exl3",
|
| 4249 |
-
"bits_per_weight":
|
| 4250 |
},
|
| 4251 |
"model.layers.17.self_attn.k_proj": {
|
| 4252 |
"stored_tensors": {
|
|
@@ -4340,14 +4342,14 @@
|
|
| 4340 |
"shape": [
|
| 4341 |
320,
|
| 4342 |
320,
|
| 4343 |
-
|
| 4344 |
],
|
| 4345 |
-
"n_bytes":
|
| 4346 |
"dtype": "torch.int16"
|
| 4347 |
}
|
| 4348 |
},
|
| 4349 |
"quant_format": "exl3",
|
| 4350 |
-
"bits_per_weight":
|
| 4351 |
},
|
| 4352 |
"model.layers.17.post_attention_layernorm": {
|
| 4353 |
"stored_tensors": {
|
|
@@ -4380,14 +4382,14 @@
|
|
| 4380 |
"shape": [
|
| 4381 |
320,
|
| 4382 |
1728,
|
| 4383 |
-
|
| 4384 |
],
|
| 4385 |
-
"n_bytes":
|
| 4386 |
"dtype": "torch.int16"
|
| 4387 |
}
|
| 4388 |
},
|
| 4389 |
"quant_format": "exl3",
|
| 4390 |
-
"bits_per_weight":
|
| 4391 |
},
|
| 4392 |
"model.layers.17.mlp.gate_proj": {
|
| 4393 |
"stored_tensors": {
|
|
@@ -4485,14 +4487,14 @@
|
|
| 4485 |
"shape": [
|
| 4486 |
320,
|
| 4487 |
320,
|
| 4488 |
-
|
| 4489 |
],
|
| 4490 |
-
"n_bytes":
|
| 4491 |
"dtype": "torch.int16"
|
| 4492 |
}
|
| 4493 |
},
|
| 4494 |
"quant_format": "exl3",
|
| 4495 |
-
"bits_per_weight":
|
| 4496 |
},
|
| 4497 |
"model.layers.18.self_attn.k_proj": {
|
| 4498 |
"stored_tensors": {
|
|
@@ -4521,14 +4523,14 @@
|
|
| 4521 |
"shape": [
|
| 4522 |
320,
|
| 4523 |
64,
|
| 4524 |
-
|
| 4525 |
],
|
| 4526 |
-
"n_bytes":
|
| 4527 |
"dtype": "torch.int16"
|
| 4528 |
}
|
| 4529 |
},
|
| 4530 |
"quant_format": "exl3",
|
| 4531 |
-
"bits_per_weight":
|
| 4532 |
},
|
| 4533 |
"model.layers.18.self_attn.v_proj": {
|
| 4534 |
"stored_tensors": {
|
|
@@ -4586,14 +4588,14 @@
|
|
| 4586 |
"shape": [
|
| 4587 |
320,
|
| 4588 |
320,
|
| 4589 |
-
|
| 4590 |
],
|
| 4591 |
-
"n_bytes":
|
| 4592 |
"dtype": "torch.int16"
|
| 4593 |
}
|
| 4594 |
},
|
| 4595 |
"quant_format": "exl3",
|
| 4596 |
-
"bits_per_weight":
|
| 4597 |
},
|
| 4598 |
"model.layers.18.post_attention_layernorm": {
|
| 4599 |
"stored_tensors": {
|
|
@@ -4626,14 +4628,14 @@
|
|
| 4626 |
"shape": [
|
| 4627 |
320,
|
| 4628 |
1728,
|
| 4629 |
-
|
| 4630 |
],
|
| 4631 |
-
"n_bytes":
|
| 4632 |
"dtype": "torch.int16"
|
| 4633 |
}
|
| 4634 |
},
|
| 4635 |
"quant_format": "exl3",
|
| 4636 |
-
"bits_per_weight":
|
| 4637 |
},
|
| 4638 |
"model.layers.18.mlp.gate_proj": {
|
| 4639 |
"stored_tensors": {
|
|
@@ -4977,14 +4979,14 @@
|
|
| 4977 |
"shape": [
|
| 4978 |
320,
|
| 4979 |
320,
|
| 4980 |
-
|
| 4981 |
],
|
| 4982 |
-
"n_bytes":
|
| 4983 |
"dtype": "torch.int16"
|
| 4984 |
}
|
| 4985 |
},
|
| 4986 |
"quant_format": "exl3",
|
| 4987 |
-
"bits_per_weight":
|
| 4988 |
},
|
| 4989 |
"model.layers.20.self_attn.k_proj": {
|
| 4990 |
"stored_tensors": {
|
|
@@ -5078,14 +5080,14 @@
|
|
| 5078 |
"shape": [
|
| 5079 |
320,
|
| 5080 |
320,
|
| 5081 |
-
|
| 5082 |
],
|
| 5083 |
-
"n_bytes":
|
| 5084 |
"dtype": "torch.int16"
|
| 5085 |
}
|
| 5086 |
},
|
| 5087 |
"quant_format": "exl3",
|
| 5088 |
-
"bits_per_weight":
|
| 5089 |
},
|
| 5090 |
"model.layers.20.post_attention_layernorm": {
|
| 5091 |
"stored_tensors": {
|
|
@@ -5118,14 +5120,14 @@
|
|
| 5118 |
"shape": [
|
| 5119 |
320,
|
| 5120 |
1728,
|
| 5121 |
-
|
| 5122 |
],
|
| 5123 |
-
"n_bytes":
|
| 5124 |
"dtype": "torch.int16"
|
| 5125 |
}
|
| 5126 |
},
|
| 5127 |
"quant_format": "exl3",
|
| 5128 |
-
"bits_per_weight":
|
| 5129 |
},
|
| 5130 |
"model.layers.20.mlp.gate_proj": {
|
| 5131 |
"stored_tensors": {
|
|
@@ -5223,14 +5225,14 @@
|
|
| 5223 |
"shape": [
|
| 5224 |
320,
|
| 5225 |
320,
|
| 5226 |
-
|
| 5227 |
],
|
| 5228 |
-
"n_bytes":
|
| 5229 |
"dtype": "torch.int16"
|
| 5230 |
}
|
| 5231 |
},
|
| 5232 |
"quant_format": "exl3",
|
| 5233 |
-
"bits_per_weight":
|
| 5234 |
},
|
| 5235 |
"model.layers.21.self_attn.k_proj": {
|
| 5236 |
"stored_tensors": {
|
|
@@ -5259,14 +5261,14 @@
|
|
| 5259 |
"shape": [
|
| 5260 |
320,
|
| 5261 |
64,
|
| 5262 |
-
|
| 5263 |
],
|
| 5264 |
-
"n_bytes":
|
| 5265 |
"dtype": "torch.int16"
|
| 5266 |
}
|
| 5267 |
},
|
| 5268 |
"quant_format": "exl3",
|
| 5269 |
-
"bits_per_weight":
|
| 5270 |
},
|
| 5271 |
"model.layers.21.self_attn.v_proj": {
|
| 5272 |
"stored_tensors": {
|
|
@@ -5324,14 +5326,14 @@
|
|
| 5324 |
"shape": [
|
| 5325 |
320,
|
| 5326 |
320,
|
| 5327 |
-
|
| 5328 |
],
|
| 5329 |
-
"n_bytes":
|
| 5330 |
"dtype": "torch.int16"
|
| 5331 |
}
|
| 5332 |
},
|
| 5333 |
"quant_format": "exl3",
|
| 5334 |
-
"bits_per_weight":
|
| 5335 |
},
|
| 5336 |
"model.layers.21.post_attention_layernorm": {
|
| 5337 |
"stored_tensors": {
|
|
@@ -5364,14 +5366,14 @@
|
|
| 5364 |
"shape": [
|
| 5365 |
320,
|
| 5366 |
1728,
|
| 5367 |
-
|
| 5368 |
],
|
| 5369 |
-
"n_bytes":
|
| 5370 |
"dtype": "torch.int16"
|
| 5371 |
}
|
| 5372 |
},
|
| 5373 |
"quant_format": "exl3",
|
| 5374 |
-
"bits_per_weight":
|
| 5375 |
},
|
| 5376 |
"model.layers.21.mlp.gate_proj": {
|
| 5377 |
"stored_tensors": {
|
|
@@ -5751,14 +5753,14 @@
|
|
| 5751 |
"shape": [
|
| 5752 |
320,
|
| 5753 |
64,
|
| 5754 |
-
|
| 5755 |
],
|
| 5756 |
-
"n_bytes":
|
| 5757 |
"dtype": "torch.int16"
|
| 5758 |
}
|
| 5759 |
},
|
| 5760 |
"quant_format": "exl3",
|
| 5761 |
-
"bits_per_weight":
|
| 5762 |
},
|
| 5763 |
"model.layers.23.self_attn.v_proj": {
|
| 5764 |
"stored_tensors": {
|
|
@@ -5787,14 +5789,14 @@
|
|
| 5787 |
"shape": [
|
| 5788 |
320,
|
| 5789 |
64,
|
| 5790 |
-
|
| 5791 |
],
|
| 5792 |
-
"n_bytes":
|
| 5793 |
"dtype": "torch.int16"
|
| 5794 |
}
|
| 5795 |
},
|
| 5796 |
"quant_format": "exl3",
|
| 5797 |
-
"bits_per_weight":
|
| 5798 |
},
|
| 5799 |
"model.layers.23.self_attn.o_proj": {
|
| 5800 |
"stored_tensors": {
|
|
@@ -6489,14 +6491,14 @@
|
|
| 6489 |
"shape": [
|
| 6490 |
320,
|
| 6491 |
64,
|
| 6492 |
-
|
| 6493 |
],
|
| 6494 |
-
"n_bytes":
|
| 6495 |
"dtype": "torch.int16"
|
| 6496 |
}
|
| 6497 |
},
|
| 6498 |
"quant_format": "exl3",
|
| 6499 |
-
"bits_per_weight":
|
| 6500 |
},
|
| 6501 |
"model.layers.26.self_attn.v_proj": {
|
| 6502 |
"stored_tensors": {
|
|
@@ -6525,14 +6527,14 @@
|
|
| 6525 |
"shape": [
|
| 6526 |
320,
|
| 6527 |
64,
|
| 6528 |
-
|
| 6529 |
],
|
| 6530 |
-
"n_bytes":
|
| 6531 |
"dtype": "torch.int16"
|
| 6532 |
}
|
| 6533 |
},
|
| 6534 |
"quant_format": "exl3",
|
| 6535 |
-
"bits_per_weight":
|
| 6536 |
},
|
| 6537 |
"model.layers.26.self_attn.o_proj": {
|
| 6538 |
"stored_tensors": {
|
|
@@ -7227,14 +7229,14 @@
|
|
| 7227 |
"shape": [
|
| 7228 |
320,
|
| 7229 |
64,
|
| 7230 |
-
|
| 7231 |
],
|
| 7232 |
-
"n_bytes":
|
| 7233 |
"dtype": "torch.int16"
|
| 7234 |
}
|
| 7235 |
},
|
| 7236 |
"quant_format": "exl3",
|
| 7237 |
-
"bits_per_weight":
|
| 7238 |
},
|
| 7239 |
"model.layers.29.self_attn.v_proj": {
|
| 7240 |
"stored_tensors": {
|
|
@@ -7683,14 +7685,14 @@
|
|
| 7683 |
"shape": [
|
| 7684 |
320,
|
| 7685 |
320,
|
| 7686 |
-
|
| 7687 |
],
|
| 7688 |
-
"n_bytes":
|
| 7689 |
"dtype": "torch.int16"
|
| 7690 |
}
|
| 7691 |
},
|
| 7692 |
"quant_format": "exl3",
|
| 7693 |
-
"bits_per_weight":
|
| 7694 |
},
|
| 7695 |
"model.layers.31.self_attn.k_proj": {
|
| 7696 |
"stored_tensors": {
|
|
@@ -7719,14 +7721,14 @@
|
|
| 7719 |
"shape": [
|
| 7720 |
320,
|
| 7721 |
64,
|
| 7722 |
-
|
| 7723 |
],
|
| 7724 |
-
"n_bytes":
|
| 7725 |
"dtype": "torch.int16"
|
| 7726 |
}
|
| 7727 |
},
|
| 7728 |
"quant_format": "exl3",
|
| 7729 |
-
"bits_per_weight":
|
| 7730 |
},
|
| 7731 |
"model.layers.31.self_attn.v_proj": {
|
| 7732 |
"stored_tensors": {
|
|
@@ -7755,14 +7757,14 @@
|
|
| 7755 |
"shape": [
|
| 7756 |
320,
|
| 7757 |
64,
|
| 7758 |
-
|
| 7759 |
],
|
| 7760 |
-
"n_bytes":
|
| 7761 |
"dtype": "torch.int16"
|
| 7762 |
}
|
| 7763 |
},
|
| 7764 |
"quant_format": "exl3",
|
| 7765 |
-
"bits_per_weight":
|
| 7766 |
},
|
| 7767 |
"model.layers.31.self_attn.o_proj": {
|
| 7768 |
"stored_tensors": {
|
|
@@ -7784,14 +7786,14 @@
|
|
| 7784 |
"shape": [
|
| 7785 |
320,
|
| 7786 |
320,
|
| 7787 |
-
|
| 7788 |
],
|
| 7789 |
-
"n_bytes":
|
| 7790 |
"dtype": "torch.int16"
|
| 7791 |
}
|
| 7792 |
},
|
| 7793 |
"quant_format": "exl3",
|
| 7794 |
-
"bits_per_weight":
|
| 7795 |
},
|
| 7796 |
"model.layers.31.post_attention_layernorm": {
|
| 7797 |
"stored_tensors": {
|
|
@@ -7824,14 +7826,14 @@
|
|
| 7824 |
"shape": [
|
| 7825 |
320,
|
| 7826 |
1728,
|
| 7827 |
-
|
| 7828 |
],
|
| 7829 |
-
"n_bytes":
|
| 7830 |
"dtype": "torch.int16"
|
| 7831 |
}
|
| 7832 |
},
|
| 7833 |
"quant_format": "exl3",
|
| 7834 |
-
"bits_per_weight":
|
| 7835 |
},
|
| 7836 |
"model.layers.31.mlp.gate_proj": {
|
| 7837 |
"stored_tensors": {
|
|
@@ -7929,14 +7931,14 @@
|
|
| 7929 |
"shape": [
|
| 7930 |
320,
|
| 7931 |
320,
|
| 7932 |
-
|
| 7933 |
],
|
| 7934 |
-
"n_bytes":
|
| 7935 |
"dtype": "torch.int16"
|
| 7936 |
}
|
| 7937 |
},
|
| 7938 |
"quant_format": "exl3",
|
| 7939 |
-
"bits_per_weight":
|
| 7940 |
},
|
| 7941 |
"model.layers.32.self_attn.k_proj": {
|
| 7942 |
"stored_tensors": {
|
|
@@ -8030,14 +8032,14 @@
|
|
| 8030 |
"shape": [
|
| 8031 |
320,
|
| 8032 |
320,
|
| 8033 |
-
|
| 8034 |
],
|
| 8035 |
-
"n_bytes":
|
| 8036 |
"dtype": "torch.int16"
|
| 8037 |
}
|
| 8038 |
},
|
| 8039 |
"quant_format": "exl3",
|
| 8040 |
-
"bits_per_weight":
|
| 8041 |
},
|
| 8042 |
"model.layers.32.post_attention_layernorm": {
|
| 8043 |
"stored_tensors": {
|
|
@@ -8070,14 +8072,14 @@
|
|
| 8070 |
"shape": [
|
| 8071 |
320,
|
| 8072 |
1728,
|
| 8073 |
-
|
| 8074 |
],
|
| 8075 |
-
"n_bytes":
|
| 8076 |
"dtype": "torch.int16"
|
| 8077 |
}
|
| 8078 |
},
|
| 8079 |
"quant_format": "exl3",
|
| 8080 |
-
"bits_per_weight":
|
| 8081 |
},
|
| 8082 |
"model.layers.32.mlp.gate_proj": {
|
| 8083 |
"stored_tensors": {
|
|
@@ -8421,14 +8423,14 @@
|
|
| 8421 |
"shape": [
|
| 8422 |
320,
|
| 8423 |
320,
|
| 8424 |
-
|
| 8425 |
],
|
| 8426 |
-
"n_bytes":
|
| 8427 |
"dtype": "torch.int16"
|
| 8428 |
}
|
| 8429 |
},
|
| 8430 |
"quant_format": "exl3",
|
| 8431 |
-
"bits_per_weight":
|
| 8432 |
},
|
| 8433 |
"model.layers.34.self_attn.k_proj": {
|
| 8434 |
"stored_tensors": {
|
|
@@ -8457,14 +8459,14 @@
|
|
| 8457 |
"shape": [
|
| 8458 |
320,
|
| 8459 |
64,
|
| 8460 |
-
|
| 8461 |
],
|
| 8462 |
-
"n_bytes":
|
| 8463 |
"dtype": "torch.int16"
|
| 8464 |
}
|
| 8465 |
},
|
| 8466 |
"quant_format": "exl3",
|
| 8467 |
-
"bits_per_weight":
|
| 8468 |
},
|
| 8469 |
"model.layers.34.self_attn.v_proj": {
|
| 8470 |
"stored_tensors": {
|
|
@@ -8522,14 +8524,14 @@
|
|
| 8522 |
"shape": [
|
| 8523 |
320,
|
| 8524 |
320,
|
| 8525 |
-
|
| 8526 |
],
|
| 8527 |
-
"n_bytes":
|
| 8528 |
"dtype": "torch.int16"
|
| 8529 |
}
|
| 8530 |
},
|
| 8531 |
"quant_format": "exl3",
|
| 8532 |
-
"bits_per_weight":
|
| 8533 |
},
|
| 8534 |
"model.layers.34.post_attention_layernorm": {
|
| 8535 |
"stored_tensors": {
|
|
@@ -8562,14 +8564,14 @@
|
|
| 8562 |
"shape": [
|
| 8563 |
320,
|
| 8564 |
1728,
|
| 8565 |
-
|
| 8566 |
],
|
| 8567 |
-
"n_bytes":
|
| 8568 |
"dtype": "torch.int16"
|
| 8569 |
}
|
| 8570 |
},
|
| 8571 |
"quant_format": "exl3",
|
| 8572 |
-
"bits_per_weight":
|
| 8573 |
},
|
| 8574 |
"model.layers.34.mlp.gate_proj": {
|
| 8575 |
"stored_tensors": {
|
|
@@ -8667,14 +8669,14 @@
|
|
| 8667 |
"shape": [
|
| 8668 |
320,
|
| 8669 |
320,
|
| 8670 |
-
|
| 8671 |
],
|
| 8672 |
-
"n_bytes":
|
| 8673 |
"dtype": "torch.int16"
|
| 8674 |
}
|
| 8675 |
},
|
| 8676 |
"quant_format": "exl3",
|
| 8677 |
-
"bits_per_weight":
|
| 8678 |
},
|
| 8679 |
"model.layers.35.self_attn.k_proj": {
|
| 8680 |
"stored_tensors": {
|
|
@@ -8768,14 +8770,14 @@
|
|
| 8768 |
"shape": [
|
| 8769 |
320,
|
| 8770 |
320,
|
| 8771 |
-
|
| 8772 |
],
|
| 8773 |
-
"n_bytes":
|
| 8774 |
"dtype": "torch.int16"
|
| 8775 |
}
|
| 8776 |
},
|
| 8777 |
"quant_format": "exl3",
|
| 8778 |
-
"bits_per_weight":
|
| 8779 |
},
|
| 8780 |
"model.layers.35.post_attention_layernorm": {
|
| 8781 |
"stored_tensors": {
|
|
@@ -8808,14 +8810,14 @@
|
|
| 8808 |
"shape": [
|
| 8809 |
320,
|
| 8810 |
1728,
|
| 8811 |
-
|
| 8812 |
],
|
| 8813 |
-
"n_bytes":
|
| 8814 |
"dtype": "torch.int16"
|
| 8815 |
}
|
| 8816 |
},
|
| 8817 |
"quant_format": "exl3",
|
| 8818 |
-
"bits_per_weight":
|
| 8819 |
},
|
| 8820 |
"model.layers.35.mlp.gate_proj": {
|
| 8821 |
"stored_tensors": {
|
|
@@ -9159,14 +9161,14 @@
|
|
| 9159 |
"shape": [
|
| 9160 |
320,
|
| 9161 |
320,
|
| 9162 |
-
|
| 9163 |
],
|
| 9164 |
-
"n_bytes":
|
| 9165 |
"dtype": "torch.int16"
|
| 9166 |
}
|
| 9167 |
},
|
| 9168 |
"quant_format": "exl3",
|
| 9169 |
-
"bits_per_weight":
|
| 9170 |
},
|
| 9171 |
"model.layers.37.self_attn.k_proj": {
|
| 9172 |
"stored_tensors": {
|
|
@@ -9195,14 +9197,14 @@
|
|
| 9195 |
"shape": [
|
| 9196 |
320,
|
| 9197 |
64,
|
| 9198 |
-
|
| 9199 |
],
|
| 9200 |
-
"n_bytes":
|
| 9201 |
"dtype": "torch.int16"
|
| 9202 |
}
|
| 9203 |
},
|
| 9204 |
"quant_format": "exl3",
|
| 9205 |
-
"bits_per_weight":
|
| 9206 |
},
|
| 9207 |
"model.layers.37.self_attn.v_proj": {
|
| 9208 |
"stored_tensors": {
|
|
@@ -9260,14 +9262,14 @@
|
|
| 9260 |
"shape": [
|
| 9261 |
320,
|
| 9262 |
320,
|
| 9263 |
-
|
| 9264 |
],
|
| 9265 |
-
"n_bytes":
|
| 9266 |
"dtype": "torch.int16"
|
| 9267 |
}
|
| 9268 |
},
|
| 9269 |
"quant_format": "exl3",
|
| 9270 |
-
"bits_per_weight":
|
| 9271 |
},
|
| 9272 |
"model.layers.37.post_attention_layernorm": {
|
| 9273 |
"stored_tensors": {
|
|
@@ -9300,14 +9302,14 @@
|
|
| 9300 |
"shape": [
|
| 9301 |
320,
|
| 9302 |
1728,
|
| 9303 |
-
|
| 9304 |
],
|
| 9305 |
-
"n_bytes":
|
| 9306 |
"dtype": "torch.int16"
|
| 9307 |
}
|
| 9308 |
},
|
| 9309 |
"quant_format": "exl3",
|
| 9310 |
-
"bits_per_weight":
|
| 9311 |
},
|
| 9312 |
"model.layers.37.mlp.gate_proj": {
|
| 9313 |
"stored_tensors": {
|
|
@@ -9405,14 +9407,14 @@
|
|
| 9405 |
"shape": [
|
| 9406 |
320,
|
| 9407 |
320,
|
| 9408 |
-
|
| 9409 |
],
|
| 9410 |
-
"n_bytes":
|
| 9411 |
"dtype": "torch.int16"
|
| 9412 |
}
|
| 9413 |
},
|
| 9414 |
"quant_format": "exl3",
|
| 9415 |
-
"bits_per_weight":
|
| 9416 |
},
|
| 9417 |
"model.layers.38.self_attn.k_proj": {
|
| 9418 |
"stored_tensors": {
|
|
@@ -9441,14 +9443,14 @@
|
|
| 9441 |
"shape": [
|
| 9442 |
320,
|
| 9443 |
64,
|
| 9444 |
-
|
| 9445 |
],
|
| 9446 |
-
"n_bytes":
|
| 9447 |
"dtype": "torch.int16"
|
| 9448 |
}
|
| 9449 |
},
|
| 9450 |
"quant_format": "exl3",
|
| 9451 |
-
"bits_per_weight":
|
| 9452 |
},
|
| 9453 |
"model.layers.38.self_attn.v_proj": {
|
| 9454 |
"stored_tensors": {
|
|
@@ -9477,14 +9479,14 @@
|
|
| 9477 |
"shape": [
|
| 9478 |
320,
|
| 9479 |
64,
|
| 9480 |
-
|
| 9481 |
],
|
| 9482 |
-
"n_bytes":
|
| 9483 |
"dtype": "torch.int16"
|
| 9484 |
}
|
| 9485 |
},
|
| 9486 |
"quant_format": "exl3",
|
| 9487 |
-
"bits_per_weight":
|
| 9488 |
},
|
| 9489 |
"model.layers.38.self_attn.o_proj": {
|
| 9490 |
"stored_tensors": {
|
|
@@ -9506,14 +9508,14 @@
|
|
| 9506 |
"shape": [
|
| 9507 |
320,
|
| 9508 |
320,
|
| 9509 |
-
|
| 9510 |
],
|
| 9511 |
-
"n_bytes":
|
| 9512 |
"dtype": "torch.int16"
|
| 9513 |
}
|
| 9514 |
},
|
| 9515 |
"quant_format": "exl3",
|
| 9516 |
-
"bits_per_weight":
|
| 9517 |
},
|
| 9518 |
"model.layers.38.post_attention_layernorm": {
|
| 9519 |
"stored_tensors": {
|
|
@@ -9546,14 +9548,14 @@
|
|
| 9546 |
"shape": [
|
| 9547 |
320,
|
| 9548 |
1728,
|
| 9549 |
-
|
| 9550 |
],
|
| 9551 |
-
"n_bytes":
|
| 9552 |
"dtype": "torch.int16"
|
| 9553 |
}
|
| 9554 |
},
|
| 9555 |
"quant_format": "exl3",
|
| 9556 |
-
"bits_per_weight":
|
| 9557 |
},
|
| 9558 |
"model.layers.38.mlp.gate_proj": {
|
| 9559 |
"stored_tensors": {
|
|
@@ -9651,14 +9653,14 @@
|
|
| 9651 |
"shape": [
|
| 9652 |
320,
|
| 9653 |
320,
|
| 9654 |
-
|
| 9655 |
],
|
| 9656 |
-
"n_bytes":
|
| 9657 |
"dtype": "torch.int16"
|
| 9658 |
}
|
| 9659 |
},
|
| 9660 |
"quant_format": "exl3",
|
| 9661 |
-
"bits_per_weight":
|
| 9662 |
},
|
| 9663 |
"model.layers.39.self_attn.k_proj": {
|
| 9664 |
"stored_tensors": {
|
|
@@ -9687,14 +9689,14 @@
|
|
| 9687 |
"shape": [
|
| 9688 |
320,
|
| 9689 |
64,
|
| 9690 |
-
|
| 9691 |
],
|
| 9692 |
-
"n_bytes":
|
| 9693 |
"dtype": "torch.int16"
|
| 9694 |
}
|
| 9695 |
},
|
| 9696 |
"quant_format": "exl3",
|
| 9697 |
-
"bits_per_weight":
|
| 9698 |
},
|
| 9699 |
"model.layers.39.self_attn.v_proj": {
|
| 9700 |
"stored_tensors": {
|
|
@@ -9723,14 +9725,14 @@
|
|
| 9723 |
"shape": [
|
| 9724 |
320,
|
| 9725 |
64,
|
| 9726 |
-
|
| 9727 |
],
|
| 9728 |
-
"n_bytes":
|
| 9729 |
"dtype": "torch.int16"
|
| 9730 |
}
|
| 9731 |
},
|
| 9732 |
"quant_format": "exl3",
|
| 9733 |
-
"bits_per_weight":
|
| 9734 |
},
|
| 9735 |
"model.layers.39.self_attn.o_proj": {
|
| 9736 |
"stored_tensors": {
|
|
@@ -9752,14 +9754,14 @@
|
|
| 9752 |
"shape": [
|
| 9753 |
320,
|
| 9754 |
320,
|
| 9755 |
-
|
| 9756 |
],
|
| 9757 |
-
"n_bytes":
|
| 9758 |
"dtype": "torch.int16"
|
| 9759 |
}
|
| 9760 |
},
|
| 9761 |
"quant_format": "exl3",
|
| 9762 |
-
"bits_per_weight":
|
| 9763 |
},
|
| 9764 |
"model.layers.39.post_attention_layernorm": {
|
| 9765 |
"stored_tensors": {
|
|
@@ -9792,14 +9794,14 @@
|
|
| 9792 |
"shape": [
|
| 9793 |
320,
|
| 9794 |
1728,
|
| 9795 |
-
|
| 9796 |
],
|
| 9797 |
-
"n_bytes":
|
| 9798 |
"dtype": "torch.int16"
|
| 9799 |
}
|
| 9800 |
},
|
| 9801 |
"quant_format": "exl3",
|
| 9802 |
-
"bits_per_weight":
|
| 9803 |
},
|
| 9804 |
"model.layers.39.mlp.gate_proj": {
|
| 9805 |
"stored_tensors": {
|
|
@@ -10143,14 +10145,14 @@
|
|
| 10143 |
"shape": [
|
| 10144 |
320,
|
| 10145 |
320,
|
| 10146 |
-
|
| 10147 |
],
|
| 10148 |
-
"n_bytes":
|
| 10149 |
"dtype": "torch.int16"
|
| 10150 |
}
|
| 10151 |
},
|
| 10152 |
"quant_format": "exl3",
|
| 10153 |
-
"bits_per_weight":
|
| 10154 |
},
|
| 10155 |
"model.layers.41.self_attn.k_proj": {
|
| 10156 |
"stored_tensors": {
|
|
@@ -10244,14 +10246,14 @@
|
|
| 10244 |
"shape": [
|
| 10245 |
320,
|
| 10246 |
320,
|
| 10247 |
-
|
| 10248 |
],
|
| 10249 |
-
"n_bytes":
|
| 10250 |
"dtype": "torch.int16"
|
| 10251 |
}
|
| 10252 |
},
|
| 10253 |
"quant_format": "exl3",
|
| 10254 |
-
"bits_per_weight":
|
| 10255 |
},
|
| 10256 |
"model.layers.41.post_attention_layernorm": {
|
| 10257 |
"stored_tensors": {
|
|
@@ -10284,14 +10286,14 @@
|
|
| 10284 |
"shape": [
|
| 10285 |
320,
|
| 10286 |
1728,
|
| 10287 |
-
|
| 10288 |
],
|
| 10289 |
-
"n_bytes":
|
| 10290 |
"dtype": "torch.int16"
|
| 10291 |
}
|
| 10292 |
},
|
| 10293 |
"quant_format": "exl3",
|
| 10294 |
-
"bits_per_weight":
|
| 10295 |
},
|
| 10296 |
"model.layers.41.mlp.gate_proj": {
|
| 10297 |
"stored_tensors": {
|
|
@@ -10389,14 +10391,14 @@
|
|
| 10389 |
"shape": [
|
| 10390 |
320,
|
| 10391 |
320,
|
| 10392 |
-
|
| 10393 |
],
|
| 10394 |
-
"n_bytes":
|
| 10395 |
"dtype": "torch.int16"
|
| 10396 |
}
|
| 10397 |
},
|
| 10398 |
"quant_format": "exl3",
|
| 10399 |
-
"bits_per_weight":
|
| 10400 |
},
|
| 10401 |
"model.layers.42.self_attn.k_proj": {
|
| 10402 |
"stored_tensors": {
|
|
@@ -10425,14 +10427,14 @@
|
|
| 10425 |
"shape": [
|
| 10426 |
320,
|
| 10427 |
64,
|
| 10428 |
-
|
| 10429 |
],
|
| 10430 |
-
"n_bytes":
|
| 10431 |
"dtype": "torch.int16"
|
| 10432 |
}
|
| 10433 |
},
|
| 10434 |
"quant_format": "exl3",
|
| 10435 |
-
"bits_per_weight":
|
| 10436 |
},
|
| 10437 |
"model.layers.42.self_attn.v_proj": {
|
| 10438 |
"stored_tensors": {
|
|
@@ -10490,14 +10492,14 @@
|
|
| 10490 |
"shape": [
|
| 10491 |
320,
|
| 10492 |
320,
|
| 10493 |
-
|
| 10494 |
],
|
| 10495 |
-
"n_bytes":
|
| 10496 |
"dtype": "torch.int16"
|
| 10497 |
}
|
| 10498 |
},
|
| 10499 |
"quant_format": "exl3",
|
| 10500 |
-
"bits_per_weight":
|
| 10501 |
},
|
| 10502 |
"model.layers.42.post_attention_layernorm": {
|
| 10503 |
"stored_tensors": {
|
|
@@ -10530,14 +10532,14 @@
|
|
| 10530 |
"shape": [
|
| 10531 |
320,
|
| 10532 |
1728,
|
| 10533 |
-
|
| 10534 |
],
|
| 10535 |
-
"n_bytes":
|
| 10536 |
"dtype": "torch.int16"
|
| 10537 |
}
|
| 10538 |
},
|
| 10539 |
"quant_format": "exl3",
|
| 10540 |
-
"bits_per_weight":
|
| 10541 |
},
|
| 10542 |
"model.layers.42.mlp.gate_proj": {
|
| 10543 |
"stored_tensors": {
|
|
@@ -10881,14 +10883,14 @@
|
|
| 10881 |
"shape": [
|
| 10882 |
320,
|
| 10883 |
320,
|
| 10884 |
-
|
| 10885 |
],
|
| 10886 |
-
"n_bytes":
|
| 10887 |
"dtype": "torch.int16"
|
| 10888 |
}
|
| 10889 |
},
|
| 10890 |
"quant_format": "exl3",
|
| 10891 |
-
"bits_per_weight":
|
| 10892 |
},
|
| 10893 |
"model.layers.44.self_attn.k_proj": {
|
| 10894 |
"stored_tensors": {
|
|
@@ -10982,14 +10984,14 @@
|
|
| 10982 |
"shape": [
|
| 10983 |
320,
|
| 10984 |
320,
|
| 10985 |
-
|
| 10986 |
],
|
| 10987 |
-
"n_bytes":
|
| 10988 |
"dtype": "torch.int16"
|
| 10989 |
}
|
| 10990 |
},
|
| 10991 |
"quant_format": "exl3",
|
| 10992 |
-
"bits_per_weight":
|
| 10993 |
},
|
| 10994 |
"model.layers.44.post_attention_layernorm": {
|
| 10995 |
"stored_tensors": {
|
|
@@ -11022,14 +11024,14 @@
|
|
| 11022 |
"shape": [
|
| 11023 |
320,
|
| 11024 |
1728,
|
| 11025 |
-
|
| 11026 |
],
|
| 11027 |
-
"n_bytes":
|
| 11028 |
"dtype": "torch.int16"
|
| 11029 |
}
|
| 11030 |
},
|
| 11031 |
"quant_format": "exl3",
|
| 11032 |
-
"bits_per_weight":
|
| 11033 |
},
|
| 11034 |
"model.layers.44.mlp.gate_proj": {
|
| 11035 |
"stored_tensors": {
|
|
@@ -11127,14 +11129,14 @@
|
|
| 11127 |
"shape": [
|
| 11128 |
320,
|
| 11129 |
320,
|
| 11130 |
-
|
| 11131 |
],
|
| 11132 |
-
"n_bytes":
|
| 11133 |
"dtype": "torch.int16"
|
| 11134 |
}
|
| 11135 |
},
|
| 11136 |
"quant_format": "exl3",
|
| 11137 |
-
"bits_per_weight":
|
| 11138 |
},
|
| 11139 |
"model.layers.45.self_attn.k_proj": {
|
| 11140 |
"stored_tensors": {
|
|
@@ -11163,14 +11165,14 @@
|
|
| 11163 |
"shape": [
|
| 11164 |
320,
|
| 11165 |
64,
|
| 11166 |
-
|
| 11167 |
],
|
| 11168 |
-
"n_bytes":
|
| 11169 |
"dtype": "torch.int16"
|
| 11170 |
}
|
| 11171 |
},
|
| 11172 |
"quant_format": "exl3",
|
| 11173 |
-
"bits_per_weight":
|
| 11174 |
},
|
| 11175 |
"model.layers.45.self_attn.v_proj": {
|
| 11176 |
"stored_tensors": {
|
|
@@ -11228,14 +11230,14 @@
|
|
| 11228 |
"shape": [
|
| 11229 |
320,
|
| 11230 |
320,
|
| 11231 |
-
|
| 11232 |
],
|
| 11233 |
-
"n_bytes":
|
| 11234 |
"dtype": "torch.int16"
|
| 11235 |
}
|
| 11236 |
},
|
| 11237 |
"quant_format": "exl3",
|
| 11238 |
-
"bits_per_weight":
|
| 11239 |
},
|
| 11240 |
"model.layers.45.post_attention_layernorm": {
|
| 11241 |
"stored_tensors": {
|
|
@@ -11268,14 +11270,14 @@
|
|
| 11268 |
"shape": [
|
| 11269 |
320,
|
| 11270 |
1728,
|
| 11271 |
-
|
| 11272 |
],
|
| 11273 |
-
"n_bytes":
|
| 11274 |
"dtype": "torch.int16"
|
| 11275 |
}
|
| 11276 |
},
|
| 11277 |
"quant_format": "exl3",
|
| 11278 |
-
"bits_per_weight":
|
| 11279 |
},
|
| 11280 |
"model.layers.45.mlp.gate_proj": {
|
| 11281 |
"stored_tensors": {
|
|
@@ -11655,14 +11657,14 @@
|
|
| 11655 |
"shape": [
|
| 11656 |
320,
|
| 11657 |
64,
|
| 11658 |
-
|
| 11659 |
],
|
| 11660 |
-
"n_bytes":
|
| 11661 |
"dtype": "torch.int16"
|
| 11662 |
}
|
| 11663 |
},
|
| 11664 |
"quant_format": "exl3",
|
| 11665 |
-
"bits_per_weight":
|
| 11666 |
},
|
| 11667 |
"model.layers.47.self_attn.v_proj": {
|
| 11668 |
"stored_tensors": {
|
|
@@ -11691,14 +11693,14 @@
|
|
| 11691 |
"shape": [
|
| 11692 |
320,
|
| 11693 |
64,
|
| 11694 |
-
|
| 11695 |
],
|
| 11696 |
-
"n_bytes":
|
| 11697 |
"dtype": "torch.int16"
|
| 11698 |
}
|
| 11699 |
},
|
| 11700 |
"quant_format": "exl3",
|
| 11701 |
-
"bits_per_weight":
|
| 11702 |
},
|
| 11703 |
"model.layers.47.self_attn.o_proj": {
|
| 11704 |
"stored_tensors": {
|
|
@@ -12393,14 +12395,14 @@
|
|
| 12393 |
"shape": [
|
| 12394 |
320,
|
| 12395 |
64,
|
| 12396 |
-
|
| 12397 |
],
|
| 12398 |
-
"n_bytes":
|
| 12399 |
"dtype": "torch.int16"
|
| 12400 |
}
|
| 12401 |
},
|
| 12402 |
"quant_format": "exl3",
|
| 12403 |
-
"bits_per_weight":
|
| 12404 |
},
|
| 12405 |
"model.layers.50.self_attn.v_proj": {
|
| 12406 |
"stored_tensors": {
|
|
@@ -12429,14 +12431,14 @@
|
|
| 12429 |
"shape": [
|
| 12430 |
320,
|
| 12431 |
64,
|
| 12432 |
-
|
| 12433 |
],
|
| 12434 |
-
"n_bytes":
|
| 12435 |
"dtype": "torch.int16"
|
| 12436 |
}
|
| 12437 |
},
|
| 12438 |
"quant_format": "exl3",
|
| 12439 |
-
"bits_per_weight":
|
| 12440 |
},
|
| 12441 |
"model.layers.50.self_attn.o_proj": {
|
| 12442 |
"stored_tensors": {
|
|
@@ -13131,14 +13133,14 @@
|
|
| 13131 |
"shape": [
|
| 13132 |
320,
|
| 13133 |
64,
|
| 13134 |
-
|
| 13135 |
],
|
| 13136 |
-
"n_bytes":
|
| 13137 |
"dtype": "torch.int16"
|
| 13138 |
}
|
| 13139 |
},
|
| 13140 |
"quant_format": "exl3",
|
| 13141 |
-
"bits_per_weight":
|
| 13142 |
},
|
| 13143 |
"model.layers.53.self_attn.v_proj": {
|
| 13144 |
"stored_tensors": {
|
|
@@ -13587,14 +13589,14 @@
|
|
| 13587 |
"shape": [
|
| 13588 |
320,
|
| 13589 |
320,
|
| 13590 |
-
|
| 13591 |
],
|
| 13592 |
-
"n_bytes":
|
| 13593 |
"dtype": "torch.int16"
|
| 13594 |
}
|
| 13595 |
},
|
| 13596 |
"quant_format": "exl3",
|
| 13597 |
-
"bits_per_weight":
|
| 13598 |
},
|
| 13599 |
"model.layers.55.self_attn.k_proj": {
|
| 13600 |
"stored_tensors": {
|
|
@@ -13623,14 +13625,14 @@
|
|
| 13623 |
"shape": [
|
| 13624 |
320,
|
| 13625 |
64,
|
| 13626 |
-
|
| 13627 |
],
|
| 13628 |
-
"n_bytes":
|
| 13629 |
"dtype": "torch.int16"
|
| 13630 |
}
|
| 13631 |
},
|
| 13632 |
"quant_format": "exl3",
|
| 13633 |
-
"bits_per_weight":
|
| 13634 |
},
|
| 13635 |
"model.layers.55.self_attn.v_proj": {
|
| 13636 |
"stored_tensors": {
|
|
@@ -13659,14 +13661,14 @@
|
|
| 13659 |
"shape": [
|
| 13660 |
320,
|
| 13661 |
64,
|
| 13662 |
-
|
| 13663 |
],
|
| 13664 |
-
"n_bytes":
|
| 13665 |
"dtype": "torch.int16"
|
| 13666 |
}
|
| 13667 |
},
|
| 13668 |
"quant_format": "exl3",
|
| 13669 |
-
"bits_per_weight":
|
| 13670 |
},
|
| 13671 |
"model.layers.55.self_attn.o_proj": {
|
| 13672 |
"stored_tensors": {
|
|
@@ -13688,14 +13690,14 @@
|
|
| 13688 |
"shape": [
|
| 13689 |
320,
|
| 13690 |
320,
|
| 13691 |
-
|
| 13692 |
],
|
| 13693 |
-
"n_bytes":
|
| 13694 |
"dtype": "torch.int16"
|
| 13695 |
}
|
| 13696 |
},
|
| 13697 |
"quant_format": "exl3",
|
| 13698 |
-
"bits_per_weight":
|
| 13699 |
},
|
| 13700 |
"model.layers.55.post_attention_layernorm": {
|
| 13701 |
"stored_tensors": {
|
|
@@ -13728,14 +13730,14 @@
|
|
| 13728 |
"shape": [
|
| 13729 |
320,
|
| 13730 |
1728,
|
| 13731 |
-
|
| 13732 |
],
|
| 13733 |
-
"n_bytes":
|
| 13734 |
"dtype": "torch.int16"
|
| 13735 |
}
|
| 13736 |
},
|
| 13737 |
"quant_format": "exl3",
|
| 13738 |
-
"bits_per_weight":
|
| 13739 |
},
|
| 13740 |
"model.layers.55.mlp.gate_proj": {
|
| 13741 |
"stored_tensors": {
|
|
@@ -13833,14 +13835,14 @@
|
|
| 13833 |
"shape": [
|
| 13834 |
320,
|
| 13835 |
320,
|
| 13836 |
-
|
| 13837 |
],
|
| 13838 |
-
"n_bytes":
|
| 13839 |
"dtype": "torch.int16"
|
| 13840 |
}
|
| 13841 |
},
|
| 13842 |
"quant_format": "exl3",
|
| 13843 |
-
"bits_per_weight":
|
| 13844 |
},
|
| 13845 |
"model.layers.56.self_attn.k_proj": {
|
| 13846 |
"stored_tensors": {
|
|
@@ -13934,14 +13936,14 @@
|
|
| 13934 |
"shape": [
|
| 13935 |
320,
|
| 13936 |
320,
|
| 13937 |
-
|
| 13938 |
],
|
| 13939 |
-
"n_bytes":
|
| 13940 |
"dtype": "torch.int16"
|
| 13941 |
}
|
| 13942 |
},
|
| 13943 |
"quant_format": "exl3",
|
| 13944 |
-
"bits_per_weight":
|
| 13945 |
},
|
| 13946 |
"model.layers.56.post_attention_layernorm": {
|
| 13947 |
"stored_tensors": {
|
|
@@ -13974,14 +13976,14 @@
|
|
| 13974 |
"shape": [
|
| 13975 |
320,
|
| 13976 |
1728,
|
| 13977 |
-
|
| 13978 |
],
|
| 13979 |
-
"n_bytes":
|
| 13980 |
"dtype": "torch.int16"
|
| 13981 |
}
|
| 13982 |
},
|
| 13983 |
"quant_format": "exl3",
|
| 13984 |
-
"bits_per_weight":
|
| 13985 |
},
|
| 13986 |
"model.layers.56.mlp.gate_proj": {
|
| 13987 |
"stored_tensors": {
|
|
@@ -14325,14 +14327,14 @@
|
|
| 14325 |
"shape": [
|
| 14326 |
320,
|
| 14327 |
320,
|
| 14328 |
-
|
| 14329 |
],
|
| 14330 |
-
"n_bytes":
|
| 14331 |
"dtype": "torch.int16"
|
| 14332 |
}
|
| 14333 |
},
|
| 14334 |
"quant_format": "exl3",
|
| 14335 |
-
"bits_per_weight":
|
| 14336 |
},
|
| 14337 |
"model.layers.58.self_attn.k_proj": {
|
| 14338 |
"stored_tensors": {
|
|
@@ -14361,14 +14363,14 @@
|
|
| 14361 |
"shape": [
|
| 14362 |
320,
|
| 14363 |
64,
|
| 14364 |
-
|
| 14365 |
],
|
| 14366 |
-
"n_bytes":
|
| 14367 |
"dtype": "torch.int16"
|
| 14368 |
}
|
| 14369 |
},
|
| 14370 |
"quant_format": "exl3",
|
| 14371 |
-
"bits_per_weight":
|
| 14372 |
},
|
| 14373 |
"model.layers.58.self_attn.v_proj": {
|
| 14374 |
"stored_tensors": {
|
|
@@ -14426,14 +14428,14 @@
|
|
| 14426 |
"shape": [
|
| 14427 |
320,
|
| 14428 |
320,
|
| 14429 |
-
|
| 14430 |
],
|
| 14431 |
-
"n_bytes":
|
| 14432 |
"dtype": "torch.int16"
|
| 14433 |
}
|
| 14434 |
},
|
| 14435 |
"quant_format": "exl3",
|
| 14436 |
-
"bits_per_weight":
|
| 14437 |
},
|
| 14438 |
"model.layers.58.post_attention_layernorm": {
|
| 14439 |
"stored_tensors": {
|
|
@@ -14466,14 +14468,14 @@
|
|
| 14466 |
"shape": [
|
| 14467 |
320,
|
| 14468 |
1728,
|
| 14469 |
-
|
| 14470 |
],
|
| 14471 |
-
"n_bytes":
|
| 14472 |
"dtype": "torch.int16"
|
| 14473 |
}
|
| 14474 |
},
|
| 14475 |
"quant_format": "exl3",
|
| 14476 |
-
"bits_per_weight":
|
| 14477 |
},
|
| 14478 |
"model.layers.58.mlp.gate_proj": {
|
| 14479 |
"stored_tensors": {
|
|
@@ -14571,14 +14573,14 @@
|
|
| 14571 |
"shape": [
|
| 14572 |
320,
|
| 14573 |
320,
|
| 14574 |
-
|
| 14575 |
],
|
| 14576 |
-
"n_bytes":
|
| 14577 |
"dtype": "torch.int16"
|
| 14578 |
}
|
| 14579 |
},
|
| 14580 |
"quant_format": "exl3",
|
| 14581 |
-
"bits_per_weight":
|
| 14582 |
},
|
| 14583 |
"model.layers.59.self_attn.k_proj": {
|
| 14584 |
"stored_tensors": {
|
|
@@ -14672,14 +14674,14 @@
|
|
| 14672 |
"shape": [
|
| 14673 |
320,
|
| 14674 |
320,
|
| 14675 |
-
|
| 14676 |
],
|
| 14677 |
-
"n_bytes":
|
| 14678 |
"dtype": "torch.int16"
|
| 14679 |
}
|
| 14680 |
},
|
| 14681 |
"quant_format": "exl3",
|
| 14682 |
-
"bits_per_weight":
|
| 14683 |
},
|
| 14684 |
"model.layers.59.post_attention_layernorm": {
|
| 14685 |
"stored_tensors": {
|
|
@@ -14712,14 +14714,14 @@
|
|
| 14712 |
"shape": [
|
| 14713 |
320,
|
| 14714 |
1728,
|
| 14715 |
-
|
| 14716 |
],
|
| 14717 |
-
"n_bytes":
|
| 14718 |
"dtype": "torch.int16"
|
| 14719 |
}
|
| 14720 |
},
|
| 14721 |
"quant_format": "exl3",
|
| 14722 |
-
"bits_per_weight":
|
| 14723 |
},
|
| 14724 |
"model.layers.59.mlp.gate_proj": {
|
| 14725 |
"stored_tensors": {
|
|
@@ -15063,14 +15065,14 @@
|
|
| 15063 |
"shape": [
|
| 15064 |
320,
|
| 15065 |
320,
|
| 15066 |
-
|
| 15067 |
],
|
| 15068 |
-
"n_bytes":
|
| 15069 |
"dtype": "torch.int16"
|
| 15070 |
}
|
| 15071 |
},
|
| 15072 |
"quant_format": "exl3",
|
| 15073 |
-
"bits_per_weight":
|
| 15074 |
},
|
| 15075 |
"model.layers.61.self_attn.k_proj": {
|
| 15076 |
"stored_tensors": {
|
|
@@ -15099,14 +15101,14 @@
|
|
| 15099 |
"shape": [
|
| 15100 |
320,
|
| 15101 |
64,
|
| 15102 |
-
|
| 15103 |
],
|
| 15104 |
-
"n_bytes":
|
| 15105 |
"dtype": "torch.int16"
|
| 15106 |
}
|
| 15107 |
},
|
| 15108 |
"quant_format": "exl3",
|
| 15109 |
-
"bits_per_weight":
|
| 15110 |
},
|
| 15111 |
"model.layers.61.self_attn.v_proj": {
|
| 15112 |
"stored_tensors": {
|
|
@@ -15164,14 +15166,14 @@
|
|
| 15164 |
"shape": [
|
| 15165 |
320,
|
| 15166 |
320,
|
| 15167 |
-
|
| 15168 |
],
|
| 15169 |
-
"n_bytes":
|
| 15170 |
"dtype": "torch.int16"
|
| 15171 |
}
|
| 15172 |
},
|
| 15173 |
"quant_format": "exl3",
|
| 15174 |
-
"bits_per_weight":
|
| 15175 |
},
|
| 15176 |
"model.layers.61.post_attention_layernorm": {
|
| 15177 |
"stored_tensors": {
|
|
@@ -15204,14 +15206,14 @@
|
|
| 15204 |
"shape": [
|
| 15205 |
320,
|
| 15206 |
1728,
|
| 15207 |
-
|
| 15208 |
],
|
| 15209 |
-
"n_bytes":
|
| 15210 |
"dtype": "torch.int16"
|
| 15211 |
}
|
| 15212 |
},
|
| 15213 |
"quant_format": "exl3",
|
| 15214 |
-
"bits_per_weight":
|
| 15215 |
},
|
| 15216 |
"model.layers.61.mlp.gate_proj": {
|
| 15217 |
"stored_tensors": {
|
|
@@ -15309,14 +15311,14 @@
|
|
| 15309 |
"shape": [
|
| 15310 |
320,
|
| 15311 |
320,
|
| 15312 |
-
|
| 15313 |
],
|
| 15314 |
-
"n_bytes":
|
| 15315 |
"dtype": "torch.int16"
|
| 15316 |
}
|
| 15317 |
},
|
| 15318 |
"quant_format": "exl3",
|
| 15319 |
-
"bits_per_weight":
|
| 15320 |
},
|
| 15321 |
"model.layers.62.self_attn.k_proj": {
|
| 15322 |
"stored_tensors": {
|
|
@@ -15345,14 +15347,14 @@
|
|
| 15345 |
"shape": [
|
| 15346 |
320,
|
| 15347 |
64,
|
| 15348 |
-
|
| 15349 |
],
|
| 15350 |
-
"n_bytes":
|
| 15351 |
"dtype": "torch.int16"
|
| 15352 |
}
|
| 15353 |
},
|
| 15354 |
"quant_format": "exl3",
|
| 15355 |
-
"bits_per_weight":
|
| 15356 |
},
|
| 15357 |
"model.layers.62.self_attn.v_proj": {
|
| 15358 |
"stored_tensors": {
|
|
@@ -15381,14 +15383,14 @@
|
|
| 15381 |
"shape": [
|
| 15382 |
320,
|
| 15383 |
64,
|
| 15384 |
-
|
| 15385 |
],
|
| 15386 |
-
"n_bytes":
|
| 15387 |
"dtype": "torch.int16"
|
| 15388 |
}
|
| 15389 |
},
|
| 15390 |
"quant_format": "exl3",
|
| 15391 |
-
"bits_per_weight":
|
| 15392 |
},
|
| 15393 |
"model.layers.62.self_attn.o_proj": {
|
| 15394 |
"stored_tensors": {
|
|
@@ -15410,14 +15412,14 @@
|
|
| 15410 |
"shape": [
|
| 15411 |
320,
|
| 15412 |
320,
|
| 15413 |
-
|
| 15414 |
],
|
| 15415 |
-
"n_bytes":
|
| 15416 |
"dtype": "torch.int16"
|
| 15417 |
}
|
| 15418 |
},
|
| 15419 |
"quant_format": "exl3",
|
| 15420 |
-
"bits_per_weight":
|
| 15421 |
},
|
| 15422 |
"model.layers.62.post_attention_layernorm": {
|
| 15423 |
"stored_tensors": {
|
|
@@ -15450,14 +15452,14 @@
|
|
| 15450 |
"shape": [
|
| 15451 |
320,
|
| 15452 |
1728,
|
| 15453 |
-
|
| 15454 |
],
|
| 15455 |
-
"n_bytes":
|
| 15456 |
"dtype": "torch.int16"
|
| 15457 |
}
|
| 15458 |
},
|
| 15459 |
"quant_format": "exl3",
|
| 15460 |
-
"bits_per_weight":
|
| 15461 |
},
|
| 15462 |
"model.layers.62.mlp.gate_proj": {
|
| 15463 |
"stored_tensors": {
|
|
@@ -15555,14 +15557,14 @@
|
|
| 15555 |
"shape": [
|
| 15556 |
320,
|
| 15557 |
320,
|
| 15558 |
-
|
| 15559 |
],
|
| 15560 |
-
"n_bytes":
|
| 15561 |
"dtype": "torch.int16"
|
| 15562 |
}
|
| 15563 |
},
|
| 15564 |
"quant_format": "exl3",
|
| 15565 |
-
"bits_per_weight":
|
| 15566 |
},
|
| 15567 |
"model.layers.63.self_attn.k_proj": {
|
| 15568 |
"stored_tensors": {
|
|
@@ -15591,14 +15593,14 @@
|
|
| 15591 |
"shape": [
|
| 15592 |
320,
|
| 15593 |
64,
|
| 15594 |
-
|
| 15595 |
],
|
| 15596 |
-
"n_bytes":
|
| 15597 |
"dtype": "torch.int16"
|
| 15598 |
}
|
| 15599 |
},
|
| 15600 |
"quant_format": "exl3",
|
| 15601 |
-
"bits_per_weight":
|
| 15602 |
},
|
| 15603 |
"model.layers.63.self_attn.v_proj": {
|
| 15604 |
"stored_tensors": {
|
|
@@ -15627,14 +15629,14 @@
|
|
| 15627 |
"shape": [
|
| 15628 |
320,
|
| 15629 |
64,
|
| 15630 |
-
|
| 15631 |
],
|
| 15632 |
-
"n_bytes":
|
| 15633 |
"dtype": "torch.int16"
|
| 15634 |
}
|
| 15635 |
},
|
| 15636 |
"quant_format": "exl3",
|
| 15637 |
-
"bits_per_weight":
|
| 15638 |
},
|
| 15639 |
"model.layers.63.self_attn.o_proj": {
|
| 15640 |
"stored_tensors": {
|
|
@@ -15656,14 +15658,14 @@
|
|
| 15656 |
"shape": [
|
| 15657 |
320,
|
| 15658 |
320,
|
| 15659 |
-
|
| 15660 |
],
|
| 15661 |
-
"n_bytes":
|
| 15662 |
"dtype": "torch.int16"
|
| 15663 |
}
|
| 15664 |
},
|
| 15665 |
"quant_format": "exl3",
|
| 15666 |
-
"bits_per_weight":
|
| 15667 |
},
|
| 15668 |
"model.layers.63.post_attention_layernorm": {
|
| 15669 |
"stored_tensors": {
|
|
@@ -15696,14 +15698,14 @@
|
|
| 15696 |
"shape": [
|
| 15697 |
320,
|
| 15698 |
1728,
|
| 15699 |
-
|
| 15700 |
],
|
| 15701 |
-
"n_bytes":
|
| 15702 |
"dtype": "torch.int16"
|
| 15703 |
}
|
| 15704 |
},
|
| 15705 |
"quant_format": "exl3",
|
| 15706 |
-
"bits_per_weight":
|
| 15707 |
},
|
| 15708 |
"model.layers.63.mlp.gate_proj": {
|
| 15709 |
"stored_tensors": {
|
|
|
|
| 1 |
{
|
| 2 |
"quant_method": "exl3",
|
| 3 |
+
"version": "0.0.4",
|
| 4 |
"bits": 4.5,
|
| 5 |
+
"head_bits": 6,
|
| 6 |
"calibration": {
|
| 7 |
"rows": 100,
|
| 8 |
"cols": 2048
|
| 9 |
},
|
| 10 |
+
"out_scales": "auto",
|
| 11 |
"tensor_storage": {
|
| 12 |
"model.embed_tokens": {
|
| 13 |
"stored_tensors": {
|
|
|
|
| 587 |
"shape": [
|
| 588 |
320,
|
| 589 |
64,
|
| 590 |
+
80
|
| 591 |
],
|
| 592 |
+
"n_bytes": 3276800,
|
| 593 |
"dtype": "torch.int16"
|
| 594 |
}
|
| 595 |
},
|
| 596 |
"quant_format": "exl3",
|
| 597 |
+
"bits_per_weight": 5
|
| 598 |
},
|
| 599 |
"model.layers.2.self_attn.v_proj": {
|
| 600 |
"stored_tensors": {
|
|
|
|
| 623 |
"shape": [
|
| 624 |
320,
|
| 625 |
64,
|
| 626 |
+
80
|
| 627 |
],
|
| 628 |
+
"n_bytes": 3276800,
|
| 629 |
"dtype": "torch.int16"
|
| 630 |
}
|
| 631 |
},
|
| 632 |
"quant_format": "exl3",
|
| 633 |
+
"bits_per_weight": 5
|
| 634 |
},
|
| 635 |
"model.layers.2.self_attn.o_proj": {
|
| 636 |
"stored_tensors": {
|
|
|
|
| 1325 |
"shape": [
|
| 1326 |
320,
|
| 1327 |
64,
|
| 1328 |
+
96
|
| 1329 |
],
|
| 1330 |
+
"n_bytes": 3932160,
|
| 1331 |
"dtype": "torch.int16"
|
| 1332 |
}
|
| 1333 |
},
|
| 1334 |
"quant_format": "exl3",
|
| 1335 |
+
"bits_per_weight": 6
|
| 1336 |
},
|
| 1337 |
"model.layers.5.self_attn.v_proj": {
|
| 1338 |
"stored_tensors": {
|
|
|
|
| 1781 |
"shape": [
|
| 1782 |
320,
|
| 1783 |
320,
|
| 1784 |
+
80
|
| 1785 |
],
|
| 1786 |
+
"n_bytes": 16384000,
|
| 1787 |
"dtype": "torch.int16"
|
| 1788 |
}
|
| 1789 |
},
|
| 1790 |
"quant_format": "exl3",
|
| 1791 |
+
"bits_per_weight": 5
|
| 1792 |
},
|
| 1793 |
"model.layers.7.self_attn.k_proj": {
|
| 1794 |
"stored_tensors": {
|
|
|
|
| 1817 |
"shape": [
|
| 1818 |
320,
|
| 1819 |
64,
|
| 1820 |
+
96
|
| 1821 |
],
|
| 1822 |
+
"n_bytes": 3932160,
|
| 1823 |
"dtype": "torch.int16"
|
| 1824 |
}
|
| 1825 |
},
|
| 1826 |
"quant_format": "exl3",
|
| 1827 |
+
"bits_per_weight": 6
|
| 1828 |
},
|
| 1829 |
"model.layers.7.self_attn.v_proj": {
|
| 1830 |
"stored_tensors": {
|
|
|
|
| 1853 |
"shape": [
|
| 1854 |
320,
|
| 1855 |
64,
|
| 1856 |
+
96
|
| 1857 |
],
|
| 1858 |
+
"n_bytes": 3932160,
|
| 1859 |
"dtype": "torch.int16"
|
| 1860 |
}
|
| 1861 |
},
|
| 1862 |
"quant_format": "exl3",
|
| 1863 |
+
"bits_per_weight": 6
|
| 1864 |
},
|
| 1865 |
"model.layers.7.self_attn.o_proj": {
|
| 1866 |
"stored_tensors": {
|
|
|
|
| 1882 |
"shape": [
|
| 1883 |
320,
|
| 1884 |
320,
|
| 1885 |
+
80
|
| 1886 |
],
|
| 1887 |
+
"n_bytes": 16384000,
|
| 1888 |
"dtype": "torch.int16"
|
| 1889 |
}
|
| 1890 |
},
|
| 1891 |
"quant_format": "exl3",
|
| 1892 |
+
"bits_per_weight": 5
|
| 1893 |
},
|
| 1894 |
"model.layers.7.post_attention_layernorm": {
|
| 1895 |
"stored_tensors": {
|
|
|
|
| 1922 |
"shape": [
|
| 1923 |
320,
|
| 1924 |
1728,
|
| 1925 |
+
64
|
| 1926 |
],
|
| 1927 |
+
"n_bytes": 70778880,
|
| 1928 |
"dtype": "torch.int16"
|
| 1929 |
}
|
| 1930 |
},
|
| 1931 |
"quant_format": "exl3",
|
| 1932 |
+
"bits_per_weight": 4
|
| 1933 |
},
|
| 1934 |
"model.layers.7.mlp.gate_proj": {
|
| 1935 |
"stored_tensors": {
|
|
|
|
| 2027 |
"shape": [
|
| 2028 |
320,
|
| 2029 |
320,
|
| 2030 |
+
64
|
| 2031 |
],
|
| 2032 |
+
"n_bytes": 13107200,
|
| 2033 |
"dtype": "torch.int16"
|
| 2034 |
}
|
| 2035 |
},
|
| 2036 |
"quant_format": "exl3",
|
| 2037 |
+
"bits_per_weight": 4
|
| 2038 |
},
|
| 2039 |
"model.layers.8.self_attn.k_proj": {
|
| 2040 |
"stored_tensors": {
|
|
|
|
| 2128 |
"shape": [
|
| 2129 |
320,
|
| 2130 |
320,
|
| 2131 |
+
64
|
| 2132 |
],
|
| 2133 |
+
"n_bytes": 13107200,
|
| 2134 |
"dtype": "torch.int16"
|
| 2135 |
}
|
| 2136 |
},
|
| 2137 |
"quant_format": "exl3",
|
| 2138 |
+
"bits_per_weight": 4
|
| 2139 |
},
|
| 2140 |
"model.layers.8.post_attention_layernorm": {
|
| 2141 |
"stored_tensors": {
|
|
|
|
| 2168 |
"shape": [
|
| 2169 |
320,
|
| 2170 |
1728,
|
| 2171 |
+
80
|
| 2172 |
],
|
| 2173 |
+
"n_bytes": 88473600,
|
| 2174 |
"dtype": "torch.int16"
|
| 2175 |
}
|
| 2176 |
},
|
| 2177 |
"quant_format": "exl3",
|
| 2178 |
+
"bits_per_weight": 5
|
| 2179 |
},
|
| 2180 |
"model.layers.8.mlp.gate_proj": {
|
| 2181 |
"stored_tensors": {
|
|
|
|
| 2519 |
"shape": [
|
| 2520 |
320,
|
| 2521 |
320,
|
| 2522 |
+
80
|
| 2523 |
],
|
| 2524 |
+
"n_bytes": 16384000,
|
| 2525 |
"dtype": "torch.int16"
|
| 2526 |
}
|
| 2527 |
},
|
| 2528 |
"quant_format": "exl3",
|
| 2529 |
+
"bits_per_weight": 5
|
| 2530 |
},
|
| 2531 |
"model.layers.10.self_attn.k_proj": {
|
| 2532 |
"stored_tensors": {
|
|
|
|
| 2555 |
"shape": [
|
| 2556 |
320,
|
| 2557 |
64,
|
| 2558 |
+
96
|
| 2559 |
],
|
| 2560 |
+
"n_bytes": 3932160,
|
| 2561 |
"dtype": "torch.int16"
|
| 2562 |
}
|
| 2563 |
},
|
| 2564 |
"quant_format": "exl3",
|
| 2565 |
+
"bits_per_weight": 6
|
| 2566 |
},
|
| 2567 |
"model.layers.10.self_attn.v_proj": {
|
| 2568 |
"stored_tensors": {
|
|
|
|
| 2620 |
"shape": [
|
| 2621 |
320,
|
| 2622 |
320,
|
| 2623 |
+
80
|
| 2624 |
],
|
| 2625 |
+
"n_bytes": 16384000,
|
| 2626 |
"dtype": "torch.int16"
|
| 2627 |
}
|
| 2628 |
},
|
| 2629 |
"quant_format": "exl3",
|
| 2630 |
+
"bits_per_weight": 5
|
| 2631 |
},
|
| 2632 |
"model.layers.10.post_attention_layernorm": {
|
| 2633 |
"stored_tensors": {
|
|
|
|
| 2660 |
"shape": [
|
| 2661 |
320,
|
| 2662 |
1728,
|
| 2663 |
+
64
|
| 2664 |
],
|
| 2665 |
+
"n_bytes": 70778880,
|
| 2666 |
"dtype": "torch.int16"
|
| 2667 |
}
|
| 2668 |
},
|
| 2669 |
"quant_format": "exl3",
|
| 2670 |
+
"bits_per_weight": 4
|
| 2671 |
},
|
| 2672 |
"model.layers.10.mlp.gate_proj": {
|
| 2673 |
"stored_tensors": {
|
|
|
|
| 2765 |
"shape": [
|
| 2766 |
320,
|
| 2767 |
320,
|
| 2768 |
+
64
|
| 2769 |
],
|
| 2770 |
+
"n_bytes": 13107200,
|
| 2771 |
"dtype": "torch.int16"
|
| 2772 |
}
|
| 2773 |
},
|
| 2774 |
"quant_format": "exl3",
|
| 2775 |
+
"bits_per_weight": 4
|
| 2776 |
},
|
| 2777 |
"model.layers.11.self_attn.k_proj": {
|
| 2778 |
"stored_tensors": {
|
|
|
|
| 2866 |
"shape": [
|
| 2867 |
320,
|
| 2868 |
320,
|
| 2869 |
+
64
|
| 2870 |
],
|
| 2871 |
+
"n_bytes": 13107200,
|
| 2872 |
"dtype": "torch.int16"
|
| 2873 |
}
|
| 2874 |
},
|
| 2875 |
"quant_format": "exl3",
|
| 2876 |
+
"bits_per_weight": 4
|
| 2877 |
},
|
| 2878 |
"model.layers.11.post_attention_layernorm": {
|
| 2879 |
"stored_tensors": {
|
|
|
|
| 2906 |
"shape": [
|
| 2907 |
320,
|
| 2908 |
1728,
|
| 2909 |
+
80
|
| 2910 |
],
|
| 2911 |
+
"n_bytes": 88473600,
|
| 2912 |
"dtype": "torch.int16"
|
| 2913 |
}
|
| 2914 |
},
|
| 2915 |
"quant_format": "exl3",
|
| 2916 |
+
"bits_per_weight": 5
|
| 2917 |
},
|
| 2918 |
"model.layers.11.mlp.gate_proj": {
|
| 2919 |
"stored_tensors": {
|
|
|
|
| 3257 |
"shape": [
|
| 3258 |
320,
|
| 3259 |
320,
|
| 3260 |
+
80
|
| 3261 |
],
|
| 3262 |
+
"n_bytes": 16384000,
|
| 3263 |
"dtype": "torch.int16"
|
| 3264 |
}
|
| 3265 |
},
|
| 3266 |
"quant_format": "exl3",
|
| 3267 |
+
"bits_per_weight": 5
|
| 3268 |
},
|
| 3269 |
"model.layers.13.self_attn.k_proj": {
|
| 3270 |
"stored_tensors": {
|
|
|
|
| 3293 |
"shape": [
|
| 3294 |
320,
|
| 3295 |
64,
|
| 3296 |
+
96
|
| 3297 |
],
|
| 3298 |
+
"n_bytes": 3932160,
|
| 3299 |
"dtype": "torch.int16"
|
| 3300 |
}
|
| 3301 |
},
|
| 3302 |
"quant_format": "exl3",
|
| 3303 |
+
"bits_per_weight": 6
|
| 3304 |
},
|
| 3305 |
"model.layers.13.self_attn.v_proj": {
|
| 3306 |
"stored_tensors": {
|
|
|
|
| 3358 |
"shape": [
|
| 3359 |
320,
|
| 3360 |
320,
|
| 3361 |
+
80
|
| 3362 |
],
|
| 3363 |
+
"n_bytes": 16384000,
|
| 3364 |
"dtype": "torch.int16"
|
| 3365 |
}
|
| 3366 |
},
|
| 3367 |
"quant_format": "exl3",
|
| 3368 |
+
"bits_per_weight": 5
|
| 3369 |
},
|
| 3370 |
"model.layers.13.post_attention_layernorm": {
|
| 3371 |
"stored_tensors": {
|
|
|
|
| 3398 |
"shape": [
|
| 3399 |
320,
|
| 3400 |
1728,
|
| 3401 |
+
64
|
| 3402 |
],
|
| 3403 |
+
"n_bytes": 70778880,
|
| 3404 |
"dtype": "torch.int16"
|
| 3405 |
}
|
| 3406 |
},
|
| 3407 |
"quant_format": "exl3",
|
| 3408 |
+
"bits_per_weight": 4
|
| 3409 |
},
|
| 3410 |
"model.layers.13.mlp.gate_proj": {
|
| 3411 |
"stored_tensors": {
|
|
|
|
| 3503 |
"shape": [
|
| 3504 |
320,
|
| 3505 |
320,
|
| 3506 |
+
64
|
| 3507 |
],
|
| 3508 |
+
"n_bytes": 13107200,
|
| 3509 |
"dtype": "torch.int16"
|
| 3510 |
}
|
| 3511 |
},
|
| 3512 |
"quant_format": "exl3",
|
| 3513 |
+
"bits_per_weight": 4
|
| 3514 |
},
|
| 3515 |
"model.layers.14.self_attn.k_proj": {
|
| 3516 |
"stored_tensors": {
|
|
|
|
| 3539 |
"shape": [
|
| 3540 |
320,
|
| 3541 |
64,
|
| 3542 |
+
80
|
| 3543 |
],
|
| 3544 |
+
"n_bytes": 3276800,
|
| 3545 |
"dtype": "torch.int16"
|
| 3546 |
}
|
| 3547 |
},
|
| 3548 |
"quant_format": "exl3",
|
| 3549 |
+
"bits_per_weight": 5
|
| 3550 |
},
|
| 3551 |
"model.layers.14.self_attn.v_proj": {
|
| 3552 |
"stored_tensors": {
|
|
|
|
| 3575 |
"shape": [
|
| 3576 |
320,
|
| 3577 |
64,
|
| 3578 |
+
80
|
| 3579 |
],
|
| 3580 |
+
"n_bytes": 3276800,
|
| 3581 |
"dtype": "torch.int16"
|
| 3582 |
}
|
| 3583 |
},
|
| 3584 |
"quant_format": "exl3",
|
| 3585 |
+
"bits_per_weight": 5
|
| 3586 |
},
|
| 3587 |
"model.layers.14.self_attn.o_proj": {
|
| 3588 |
"stored_tensors": {
|
|
|
|
| 3604 |
"shape": [
|
| 3605 |
320,
|
| 3606 |
320,
|
| 3607 |
+
64
|
| 3608 |
],
|
| 3609 |
+
"n_bytes": 13107200,
|
| 3610 |
"dtype": "torch.int16"
|
| 3611 |
}
|
| 3612 |
},
|
| 3613 |
"quant_format": "exl3",
|
| 3614 |
+
"bits_per_weight": 4
|
| 3615 |
},
|
| 3616 |
"model.layers.14.post_attention_layernorm": {
|
| 3617 |
"stored_tensors": {
|
|
|
|
| 3644 |
"shape": [
|
| 3645 |
320,
|
| 3646 |
1728,
|
| 3647 |
+
80
|
| 3648 |
],
|
| 3649 |
+
"n_bytes": 88473600,
|
| 3650 |
"dtype": "torch.int16"
|
| 3651 |
}
|
| 3652 |
},
|
| 3653 |
"quant_format": "exl3",
|
| 3654 |
+
"bits_per_weight": 5
|
| 3655 |
},
|
| 3656 |
"model.layers.14.mlp.gate_proj": {
|
| 3657 |
"stored_tensors": {
|
|
|
|
| 3749 |
"shape": [
|
| 3750 |
320,
|
| 3751 |
320,
|
| 3752 |
+
80
|
| 3753 |
],
|
| 3754 |
+
"n_bytes": 16384000,
|
| 3755 |
"dtype": "torch.int16"
|
| 3756 |
}
|
| 3757 |
},
|
| 3758 |
"quant_format": "exl3",
|
| 3759 |
+
"bits_per_weight": 5
|
| 3760 |
},
|
| 3761 |
"model.layers.15.self_attn.k_proj": {
|
| 3762 |
"stored_tensors": {
|
|
|
|
| 3785 |
"shape": [
|
| 3786 |
320,
|
| 3787 |
64,
|
| 3788 |
+
96
|
| 3789 |
],
|
| 3790 |
+
"n_bytes": 3932160,
|
| 3791 |
"dtype": "torch.int16"
|
| 3792 |
}
|
| 3793 |
},
|
| 3794 |
"quant_format": "exl3",
|
| 3795 |
+
"bits_per_weight": 6
|
| 3796 |
},
|
| 3797 |
"model.layers.15.self_attn.v_proj": {
|
| 3798 |
"stored_tensors": {
|
|
|
|
| 3821 |
"shape": [
|
| 3822 |
320,
|
| 3823 |
64,
|
| 3824 |
+
96
|
| 3825 |
],
|
| 3826 |
+
"n_bytes": 3932160,
|
| 3827 |
"dtype": "torch.int16"
|
| 3828 |
}
|
| 3829 |
},
|
| 3830 |
"quant_format": "exl3",
|
| 3831 |
+
"bits_per_weight": 6
|
| 3832 |
},
|
| 3833 |
"model.layers.15.self_attn.o_proj": {
|
| 3834 |
"stored_tensors": {
|
|
|
|
| 3850 |
"shape": [
|
| 3851 |
320,
|
| 3852 |
320,
|
| 3853 |
+
80
|
| 3854 |
],
|
| 3855 |
+
"n_bytes": 16384000,
|
| 3856 |
"dtype": "torch.int16"
|
| 3857 |
}
|
| 3858 |
},
|
| 3859 |
"quant_format": "exl3",
|
| 3860 |
+
"bits_per_weight": 5
|
| 3861 |
},
|
| 3862 |
"model.layers.15.post_attention_layernorm": {
|
| 3863 |
"stored_tensors": {
|
|
|
|
| 3890 |
"shape": [
|
| 3891 |
320,
|
| 3892 |
1728,
|
| 3893 |
+
64
|
| 3894 |
],
|
| 3895 |
+
"n_bytes": 70778880,
|
| 3896 |
"dtype": "torch.int16"
|
| 3897 |
}
|
| 3898 |
},
|
| 3899 |
"quant_format": "exl3",
|
| 3900 |
+
"bits_per_weight": 4
|
| 3901 |
},
|
| 3902 |
"model.layers.15.mlp.gate_proj": {
|
| 3903 |
"stored_tensors": {
|
|
|
|
| 4241 |
"shape": [
|
| 4242 |
320,
|
| 4243 |
320,
|
| 4244 |
+
64
|
| 4245 |
],
|
| 4246 |
+
"n_bytes": 13107200,
|
| 4247 |
"dtype": "torch.int16"
|
| 4248 |
}
|
| 4249 |
},
|
| 4250 |
"quant_format": "exl3",
|
| 4251 |
+
"bits_per_weight": 4
|
| 4252 |
},
|
| 4253 |
"model.layers.17.self_attn.k_proj": {
|
| 4254 |
"stored_tensors": {
|
|
|
|
| 4342 |
"shape": [
|
| 4343 |
320,
|
| 4344 |
320,
|
| 4345 |
+
64
|
| 4346 |
],
|
| 4347 |
+
"n_bytes": 13107200,
|
| 4348 |
"dtype": "torch.int16"
|
| 4349 |
}
|
| 4350 |
},
|
| 4351 |
"quant_format": "exl3",
|
| 4352 |
+
"bits_per_weight": 4
|
| 4353 |
},
|
| 4354 |
"model.layers.17.post_attention_layernorm": {
|
| 4355 |
"stored_tensors": {
|
|
|
|
| 4382 |
"shape": [
|
| 4383 |
320,
|
| 4384 |
1728,
|
| 4385 |
+
80
|
| 4386 |
],
|
| 4387 |
+
"n_bytes": 88473600,
|
| 4388 |
"dtype": "torch.int16"
|
| 4389 |
}
|
| 4390 |
},
|
| 4391 |
"quant_format": "exl3",
|
| 4392 |
+
"bits_per_weight": 5
|
| 4393 |
},
|
| 4394 |
"model.layers.17.mlp.gate_proj": {
|
| 4395 |
"stored_tensors": {
|
|
|
|
| 4487 |
"shape": [
|
| 4488 |
320,
|
| 4489 |
320,
|
| 4490 |
+
80
|
| 4491 |
],
|
| 4492 |
+
"n_bytes": 16384000,
|
| 4493 |
"dtype": "torch.int16"
|
| 4494 |
}
|
| 4495 |
},
|
| 4496 |
"quant_format": "exl3",
|
| 4497 |
+
"bits_per_weight": 5
|
| 4498 |
},
|
| 4499 |
"model.layers.18.self_attn.k_proj": {
|
| 4500 |
"stored_tensors": {
|
|
|
|
| 4523 |
"shape": [
|
| 4524 |
320,
|
| 4525 |
64,
|
| 4526 |
+
96
|
| 4527 |
],
|
| 4528 |
+
"n_bytes": 3932160,
|
| 4529 |
"dtype": "torch.int16"
|
| 4530 |
}
|
| 4531 |
},
|
| 4532 |
"quant_format": "exl3",
|
| 4533 |
+
"bits_per_weight": 6
|
| 4534 |
},
|
| 4535 |
"model.layers.18.self_attn.v_proj": {
|
| 4536 |
"stored_tensors": {
|
|
|
|
| 4588 |
"shape": [
|
| 4589 |
320,
|
| 4590 |
320,
|
| 4591 |
+
80
|
| 4592 |
],
|
| 4593 |
+
"n_bytes": 16384000,
|
| 4594 |
"dtype": "torch.int16"
|
| 4595 |
}
|
| 4596 |
},
|
| 4597 |
"quant_format": "exl3",
|
| 4598 |
+
"bits_per_weight": 5
|
| 4599 |
},
|
| 4600 |
"model.layers.18.post_attention_layernorm": {
|
| 4601 |
"stored_tensors": {
|
|
|
|
| 4628 |
"shape": [
|
| 4629 |
320,
|
| 4630 |
1728,
|
| 4631 |
+
64
|
| 4632 |
],
|
| 4633 |
+
"n_bytes": 70778880,
|
| 4634 |
"dtype": "torch.int16"
|
| 4635 |
}
|
| 4636 |
},
|
| 4637 |
"quant_format": "exl3",
|
| 4638 |
+
"bits_per_weight": 4
|
| 4639 |
},
|
| 4640 |
"model.layers.18.mlp.gate_proj": {
|
| 4641 |
"stored_tensors": {
|
|
|
|
| 4979 |
"shape": [
|
| 4980 |
320,
|
| 4981 |
320,
|
| 4982 |
+
64
|
| 4983 |
],
|
| 4984 |
+
"n_bytes": 13107200,
|
| 4985 |
"dtype": "torch.int16"
|
| 4986 |
}
|
| 4987 |
},
|
| 4988 |
"quant_format": "exl3",
|
| 4989 |
+
"bits_per_weight": 4
|
| 4990 |
},
|
| 4991 |
"model.layers.20.self_attn.k_proj": {
|
| 4992 |
"stored_tensors": {
|
|
|
|
| 5080 |
"shape": [
|
| 5081 |
320,
|
| 5082 |
320,
|
| 5083 |
+
64
|
| 5084 |
],
|
| 5085 |
+
"n_bytes": 13107200,
|
| 5086 |
"dtype": "torch.int16"
|
| 5087 |
}
|
| 5088 |
},
|
| 5089 |
"quant_format": "exl3",
|
| 5090 |
+
"bits_per_weight": 4
|
| 5091 |
},
|
| 5092 |
"model.layers.20.post_attention_layernorm": {
|
| 5093 |
"stored_tensors": {
|
|
|
|
| 5120 |
"shape": [
|
| 5121 |
320,
|
| 5122 |
1728,
|
| 5123 |
+
80
|
| 5124 |
],
|
| 5125 |
+
"n_bytes": 88473600,
|
| 5126 |
"dtype": "torch.int16"
|
| 5127 |
}
|
| 5128 |
},
|
| 5129 |
"quant_format": "exl3",
|
| 5130 |
+
"bits_per_weight": 5
|
| 5131 |
},
|
| 5132 |
"model.layers.20.mlp.gate_proj": {
|
| 5133 |
"stored_tensors": {
|
|
|
|
| 5225 |
"shape": [
|
| 5226 |
320,
|
| 5227 |
320,
|
| 5228 |
+
80
|
| 5229 |
],
|
| 5230 |
+
"n_bytes": 16384000,
|
| 5231 |
"dtype": "torch.int16"
|
| 5232 |
}
|
| 5233 |
},
|
| 5234 |
"quant_format": "exl3",
|
| 5235 |
+
"bits_per_weight": 5
|
| 5236 |
},
|
| 5237 |
"model.layers.21.self_attn.k_proj": {
|
| 5238 |
"stored_tensors": {
|
|
|
|
| 5261 |
"shape": [
|
| 5262 |
320,
|
| 5263 |
64,
|
| 5264 |
+
96
|
| 5265 |
],
|
| 5266 |
+
"n_bytes": 3932160,
|
| 5267 |
"dtype": "torch.int16"
|
| 5268 |
}
|
| 5269 |
},
|
| 5270 |
"quant_format": "exl3",
|
| 5271 |
+
"bits_per_weight": 6
|
| 5272 |
},
|
| 5273 |
"model.layers.21.self_attn.v_proj": {
|
| 5274 |
"stored_tensors": {
|
|
|
|
| 5326 |
"shape": [
|
| 5327 |
320,
|
| 5328 |
320,
|
| 5329 |
+
80
|
| 5330 |
],
|
| 5331 |
+
"n_bytes": 16384000,
|
| 5332 |
"dtype": "torch.int16"
|
| 5333 |
}
|
| 5334 |
},
|
| 5335 |
"quant_format": "exl3",
|
| 5336 |
+
"bits_per_weight": 5
|
| 5337 |
},
|
| 5338 |
"model.layers.21.post_attention_layernorm": {
|
| 5339 |
"stored_tensors": {
|
|
|
|
| 5366 |
"shape": [
|
| 5367 |
320,
|
| 5368 |
1728,
|
| 5369 |
+
64
|
| 5370 |
],
|
| 5371 |
+
"n_bytes": 70778880,
|
| 5372 |
"dtype": "torch.int16"
|
| 5373 |
}
|
| 5374 |
},
|
| 5375 |
"quant_format": "exl3",
|
| 5376 |
+
"bits_per_weight": 4
|
| 5377 |
},
|
| 5378 |
"model.layers.21.mlp.gate_proj": {
|
| 5379 |
"stored_tensors": {
|
|
|
|
| 5753 |
"shape": [
|
| 5754 |
320,
|
| 5755 |
64,
|
| 5756 |
+
96
|
| 5757 |
],
|
| 5758 |
+
"n_bytes": 3932160,
|
| 5759 |
"dtype": "torch.int16"
|
| 5760 |
}
|
| 5761 |
},
|
| 5762 |
"quant_format": "exl3",
|
| 5763 |
+
"bits_per_weight": 6
|
| 5764 |
},
|
| 5765 |
"model.layers.23.self_attn.v_proj": {
|
| 5766 |
"stored_tensors": {
|
|
|
|
| 5789 |
"shape": [
|
| 5790 |
320,
|
| 5791 |
64,
|
| 5792 |
+
96
|
| 5793 |
],
|
| 5794 |
+
"n_bytes": 3932160,
|
| 5795 |
"dtype": "torch.int16"
|
| 5796 |
}
|
| 5797 |
},
|
| 5798 |
"quant_format": "exl3",
|
| 5799 |
+
"bits_per_weight": 6
|
| 5800 |
},
|
| 5801 |
"model.layers.23.self_attn.o_proj": {
|
| 5802 |
"stored_tensors": {
|
|
|
|
| 6491 |
"shape": [
|
| 6492 |
320,
|
| 6493 |
64,
|
| 6494 |
+
80
|
| 6495 |
],
|
| 6496 |
+
"n_bytes": 3276800,
|
| 6497 |
"dtype": "torch.int16"
|
| 6498 |
}
|
| 6499 |
},
|
| 6500 |
"quant_format": "exl3",
|
| 6501 |
+
"bits_per_weight": 5
|
| 6502 |
},
|
| 6503 |
"model.layers.26.self_attn.v_proj": {
|
| 6504 |
"stored_tensors": {
|
|
|
|
| 6527 |
"shape": [
|
| 6528 |
320,
|
| 6529 |
64,
|
| 6530 |
+
80
|
| 6531 |
],
|
| 6532 |
+
"n_bytes": 3276800,
|
| 6533 |
"dtype": "torch.int16"
|
| 6534 |
}
|
| 6535 |
},
|
| 6536 |
"quant_format": "exl3",
|
| 6537 |
+
"bits_per_weight": 5
|
| 6538 |
},
|
| 6539 |
"model.layers.26.self_attn.o_proj": {
|
| 6540 |
"stored_tensors": {
|
|
|
|
| 7229 |
"shape": [
|
| 7230 |
320,
|
| 7231 |
64,
|
| 7232 |
+
96
|
| 7233 |
],
|
| 7234 |
+
"n_bytes": 3932160,
|
| 7235 |
"dtype": "torch.int16"
|
| 7236 |
}
|
| 7237 |
},
|
| 7238 |
"quant_format": "exl3",
|
| 7239 |
+
"bits_per_weight": 6
|
| 7240 |
},
|
| 7241 |
"model.layers.29.self_attn.v_proj": {
|
| 7242 |
"stored_tensors": {
|
|
|
|
| 7685 |
"shape": [
|
| 7686 |
320,
|
| 7687 |
320,
|
| 7688 |
+
80
|
| 7689 |
],
|
| 7690 |
+
"n_bytes": 16384000,
|
| 7691 |
"dtype": "torch.int16"
|
| 7692 |
}
|
| 7693 |
},
|
| 7694 |
"quant_format": "exl3",
|
| 7695 |
+
"bits_per_weight": 5
|
| 7696 |
},
|
| 7697 |
"model.layers.31.self_attn.k_proj": {
|
| 7698 |
"stored_tensors": {
|
|
|
|
| 7721 |
"shape": [
|
| 7722 |
320,
|
| 7723 |
64,
|
| 7724 |
+
96
|
| 7725 |
],
|
| 7726 |
+
"n_bytes": 3932160,
|
| 7727 |
"dtype": "torch.int16"
|
| 7728 |
}
|
| 7729 |
},
|
| 7730 |
"quant_format": "exl3",
|
| 7731 |
+
"bits_per_weight": 6
|
| 7732 |
},
|
| 7733 |
"model.layers.31.self_attn.v_proj": {
|
| 7734 |
"stored_tensors": {
|
|
|
|
| 7757 |
"shape": [
|
| 7758 |
320,
|
| 7759 |
64,
|
| 7760 |
+
96
|
| 7761 |
],
|
| 7762 |
+
"n_bytes": 3932160,
|
| 7763 |
"dtype": "torch.int16"
|
| 7764 |
}
|
| 7765 |
},
|
| 7766 |
"quant_format": "exl3",
|
| 7767 |
+
"bits_per_weight": 6
|
| 7768 |
},
|
| 7769 |
"model.layers.31.self_attn.o_proj": {
|
| 7770 |
"stored_tensors": {
|
|
|
|
| 7786 |
"shape": [
|
| 7787 |
320,
|
| 7788 |
320,
|
| 7789 |
+
80
|
| 7790 |
],
|
| 7791 |
+
"n_bytes": 16384000,
|
| 7792 |
"dtype": "torch.int16"
|
| 7793 |
}
|
| 7794 |
},
|
| 7795 |
"quant_format": "exl3",
|
| 7796 |
+
"bits_per_weight": 5
|
| 7797 |
},
|
| 7798 |
"model.layers.31.post_attention_layernorm": {
|
| 7799 |
"stored_tensors": {
|
|
|
|
| 7826 |
"shape": [
|
| 7827 |
320,
|
| 7828 |
1728,
|
| 7829 |
+
64
|
| 7830 |
],
|
| 7831 |
+
"n_bytes": 70778880,
|
| 7832 |
"dtype": "torch.int16"
|
| 7833 |
}
|
| 7834 |
},
|
| 7835 |
"quant_format": "exl3",
|
| 7836 |
+
"bits_per_weight": 4
|
| 7837 |
},
|
| 7838 |
"model.layers.31.mlp.gate_proj": {
|
| 7839 |
"stored_tensors": {
|
|
|
|
| 7931 |
"shape": [
|
| 7932 |
320,
|
| 7933 |
320,
|
| 7934 |
+
64
|
| 7935 |
],
|
| 7936 |
+
"n_bytes": 13107200,
|
| 7937 |
"dtype": "torch.int16"
|
| 7938 |
}
|
| 7939 |
},
|
| 7940 |
"quant_format": "exl3",
|
| 7941 |
+
"bits_per_weight": 4
|
| 7942 |
},
|
| 7943 |
"model.layers.32.self_attn.k_proj": {
|
| 7944 |
"stored_tensors": {
|
|
|
|
| 8032 |
"shape": [
|
| 8033 |
320,
|
| 8034 |
320,
|
| 8035 |
+
64
|
| 8036 |
],
|
| 8037 |
+
"n_bytes": 13107200,
|
| 8038 |
"dtype": "torch.int16"
|
| 8039 |
}
|
| 8040 |
},
|
| 8041 |
"quant_format": "exl3",
|
| 8042 |
+
"bits_per_weight": 4
|
| 8043 |
},
|
| 8044 |
"model.layers.32.post_attention_layernorm": {
|
| 8045 |
"stored_tensors": {
|
|
|
|
| 8072 |
"shape": [
|
| 8073 |
320,
|
| 8074 |
1728,
|
| 8075 |
+
80
|
| 8076 |
],
|
| 8077 |
+
"n_bytes": 88473600,
|
| 8078 |
"dtype": "torch.int16"
|
| 8079 |
}
|
| 8080 |
},
|
| 8081 |
"quant_format": "exl3",
|
| 8082 |
+
"bits_per_weight": 5
|
| 8083 |
},
|
| 8084 |
"model.layers.32.mlp.gate_proj": {
|
| 8085 |
"stored_tensors": {
|
|
|
|
| 8423 |
"shape": [
|
| 8424 |
320,
|
| 8425 |
320,
|
| 8426 |
+
80
|
| 8427 |
],
|
| 8428 |
+
"n_bytes": 16384000,
|
| 8429 |
"dtype": "torch.int16"
|
| 8430 |
}
|
| 8431 |
},
|
| 8432 |
"quant_format": "exl3",
|
| 8433 |
+
"bits_per_weight": 5
|
| 8434 |
},
|
| 8435 |
"model.layers.34.self_attn.k_proj": {
|
| 8436 |
"stored_tensors": {
|
|
|
|
| 8459 |
"shape": [
|
| 8460 |
320,
|
| 8461 |
64,
|
| 8462 |
+
96
|
| 8463 |
],
|
| 8464 |
+
"n_bytes": 3932160,
|
| 8465 |
"dtype": "torch.int16"
|
| 8466 |
}
|
| 8467 |
},
|
| 8468 |
"quant_format": "exl3",
|
| 8469 |
+
"bits_per_weight": 6
|
| 8470 |
},
|
| 8471 |
"model.layers.34.self_attn.v_proj": {
|
| 8472 |
"stored_tensors": {
|
|
|
|
| 8524 |
"shape": [
|
| 8525 |
320,
|
| 8526 |
320,
|
| 8527 |
+
80
|
| 8528 |
],
|
| 8529 |
+
"n_bytes": 16384000,
|
| 8530 |
"dtype": "torch.int16"
|
| 8531 |
}
|
| 8532 |
},
|
| 8533 |
"quant_format": "exl3",
|
| 8534 |
+
"bits_per_weight": 5
|
| 8535 |
},
|
| 8536 |
"model.layers.34.post_attention_layernorm": {
|
| 8537 |
"stored_tensors": {
|
|
|
|
| 8564 |
"shape": [
|
| 8565 |
320,
|
| 8566 |
1728,
|
| 8567 |
+
64
|
| 8568 |
],
|
| 8569 |
+
"n_bytes": 70778880,
|
| 8570 |
"dtype": "torch.int16"
|
| 8571 |
}
|
| 8572 |
},
|
| 8573 |
"quant_format": "exl3",
|
| 8574 |
+
"bits_per_weight": 4
|
| 8575 |
},
|
| 8576 |
"model.layers.34.mlp.gate_proj": {
|
| 8577 |
"stored_tensors": {
|
|
|
|
| 8669 |
"shape": [
|
| 8670 |
320,
|
| 8671 |
320,
|
| 8672 |
+
64
|
| 8673 |
],
|
| 8674 |
+
"n_bytes": 13107200,
|
| 8675 |
"dtype": "torch.int16"
|
| 8676 |
}
|
| 8677 |
},
|
| 8678 |
"quant_format": "exl3",
|
| 8679 |
+
"bits_per_weight": 4
|
| 8680 |
},
|
| 8681 |
"model.layers.35.self_attn.k_proj": {
|
| 8682 |
"stored_tensors": {
|
|
|
|
| 8770 |
"shape": [
|
| 8771 |
320,
|
| 8772 |
320,
|
| 8773 |
+
64
|
| 8774 |
],
|
| 8775 |
+
"n_bytes": 13107200,
|
| 8776 |
"dtype": "torch.int16"
|
| 8777 |
}
|
| 8778 |
},
|
| 8779 |
"quant_format": "exl3",
|
| 8780 |
+
"bits_per_weight": 4
|
| 8781 |
},
|
| 8782 |
"model.layers.35.post_attention_layernorm": {
|
| 8783 |
"stored_tensors": {
|
|
|
|
| 8810 |
"shape": [
|
| 8811 |
320,
|
| 8812 |
1728,
|
| 8813 |
+
80
|
| 8814 |
],
|
| 8815 |
+
"n_bytes": 88473600,
|
| 8816 |
"dtype": "torch.int16"
|
| 8817 |
}
|
| 8818 |
},
|
| 8819 |
"quant_format": "exl3",
|
| 8820 |
+
"bits_per_weight": 5
|
| 8821 |
},
|
| 8822 |
"model.layers.35.mlp.gate_proj": {
|
| 8823 |
"stored_tensors": {
|
|
|
|
| 9161 |
"shape": [
|
| 9162 |
320,
|
| 9163 |
320,
|
| 9164 |
+
80
|
| 9165 |
],
|
| 9166 |
+
"n_bytes": 16384000,
|
| 9167 |
"dtype": "torch.int16"
|
| 9168 |
}
|
| 9169 |
},
|
| 9170 |
"quant_format": "exl3",
|
| 9171 |
+
"bits_per_weight": 5
|
| 9172 |
},
|
| 9173 |
"model.layers.37.self_attn.k_proj": {
|
| 9174 |
"stored_tensors": {
|
|
|
|
| 9197 |
"shape": [
|
| 9198 |
320,
|
| 9199 |
64,
|
| 9200 |
+
96
|
| 9201 |
],
|
| 9202 |
+
"n_bytes": 3932160,
|
| 9203 |
"dtype": "torch.int16"
|
| 9204 |
}
|
| 9205 |
},
|
| 9206 |
"quant_format": "exl3",
|
| 9207 |
+
"bits_per_weight": 6
|
| 9208 |
},
|
| 9209 |
"model.layers.37.self_attn.v_proj": {
|
| 9210 |
"stored_tensors": {
|
|
|
|
| 9262 |
"shape": [
|
| 9263 |
320,
|
| 9264 |
320,
|
| 9265 |
+
80
|
| 9266 |
],
|
| 9267 |
+
"n_bytes": 16384000,
|
| 9268 |
"dtype": "torch.int16"
|
| 9269 |
}
|
| 9270 |
},
|
| 9271 |
"quant_format": "exl3",
|
| 9272 |
+
"bits_per_weight": 5
|
| 9273 |
},
|
| 9274 |
"model.layers.37.post_attention_layernorm": {
|
| 9275 |
"stored_tensors": {
|
|
|
|
| 9302 |
"shape": [
|
| 9303 |
320,
|
| 9304 |
1728,
|
| 9305 |
+
64
|
| 9306 |
],
|
| 9307 |
+
"n_bytes": 70778880,
|
| 9308 |
"dtype": "torch.int16"
|
| 9309 |
}
|
| 9310 |
},
|
| 9311 |
"quant_format": "exl3",
|
| 9312 |
+
"bits_per_weight": 4
|
| 9313 |
},
|
| 9314 |
"model.layers.37.mlp.gate_proj": {
|
| 9315 |
"stored_tensors": {
|
|
|
|
| 9407 |
"shape": [
|
| 9408 |
320,
|
| 9409 |
320,
|
| 9410 |
+
64
|
| 9411 |
],
|
| 9412 |
+
"n_bytes": 13107200,
|
| 9413 |
"dtype": "torch.int16"
|
| 9414 |
}
|
| 9415 |
},
|
| 9416 |
"quant_format": "exl3",
|
| 9417 |
+
"bits_per_weight": 4
|
| 9418 |
},
|
| 9419 |
"model.layers.38.self_attn.k_proj": {
|
| 9420 |
"stored_tensors": {
|
|
|
|
| 9443 |
"shape": [
|
| 9444 |
320,
|
| 9445 |
64,
|
| 9446 |
+
80
|
| 9447 |
],
|
| 9448 |
+
"n_bytes": 3276800,
|
| 9449 |
"dtype": "torch.int16"
|
| 9450 |
}
|
| 9451 |
},
|
| 9452 |
"quant_format": "exl3",
|
| 9453 |
+
"bits_per_weight": 5
|
| 9454 |
},
|
| 9455 |
"model.layers.38.self_attn.v_proj": {
|
| 9456 |
"stored_tensors": {
|
|
|
|
| 9479 |
"shape": [
|
| 9480 |
320,
|
| 9481 |
64,
|
| 9482 |
+
80
|
| 9483 |
],
|
| 9484 |
+
"n_bytes": 3276800,
|
| 9485 |
"dtype": "torch.int16"
|
| 9486 |
}
|
| 9487 |
},
|
| 9488 |
"quant_format": "exl3",
|
| 9489 |
+
"bits_per_weight": 5
|
| 9490 |
},
|
| 9491 |
"model.layers.38.self_attn.o_proj": {
|
| 9492 |
"stored_tensors": {
|
|
|
|
| 9508 |
"shape": [
|
| 9509 |
320,
|
| 9510 |
320,
|
| 9511 |
+
64
|
| 9512 |
],
|
| 9513 |
+
"n_bytes": 13107200,
|
| 9514 |
"dtype": "torch.int16"
|
| 9515 |
}
|
| 9516 |
},
|
| 9517 |
"quant_format": "exl3",
|
| 9518 |
+
"bits_per_weight": 4
|
| 9519 |
},
|
| 9520 |
"model.layers.38.post_attention_layernorm": {
|
| 9521 |
"stored_tensors": {
|
|
|
|
| 9548 |
"shape": [
|
| 9549 |
320,
|
| 9550 |
1728,
|
| 9551 |
+
80
|
| 9552 |
],
|
| 9553 |
+
"n_bytes": 88473600,
|
| 9554 |
"dtype": "torch.int16"
|
| 9555 |
}
|
| 9556 |
},
|
| 9557 |
"quant_format": "exl3",
|
| 9558 |
+
"bits_per_weight": 5
|
| 9559 |
},
|
| 9560 |
"model.layers.38.mlp.gate_proj": {
|
| 9561 |
"stored_tensors": {
|
|
|
|
| 9653 |
"shape": [
|
| 9654 |
320,
|
| 9655 |
320,
|
| 9656 |
+
80
|
| 9657 |
],
|
| 9658 |
+
"n_bytes": 16384000,
|
| 9659 |
"dtype": "torch.int16"
|
| 9660 |
}
|
| 9661 |
},
|
| 9662 |
"quant_format": "exl3",
|
| 9663 |
+
"bits_per_weight": 5
|
| 9664 |
},
|
| 9665 |
"model.layers.39.self_attn.k_proj": {
|
| 9666 |
"stored_tensors": {
|
|
|
|
| 9689 |
"shape": [
|
| 9690 |
320,
|
| 9691 |
64,
|
| 9692 |
+
96
|
| 9693 |
],
|
| 9694 |
+
"n_bytes": 3932160,
|
| 9695 |
"dtype": "torch.int16"
|
| 9696 |
}
|
| 9697 |
},
|
| 9698 |
"quant_format": "exl3",
|
| 9699 |
+
"bits_per_weight": 6
|
| 9700 |
},
|
| 9701 |
"model.layers.39.self_attn.v_proj": {
|
| 9702 |
"stored_tensors": {
|
|
|
|
| 9725 |
"shape": [
|
| 9726 |
320,
|
| 9727 |
64,
|
| 9728 |
+
96
|
| 9729 |
],
|
| 9730 |
+
"n_bytes": 3932160,
|
| 9731 |
"dtype": "torch.int16"
|
| 9732 |
}
|
| 9733 |
},
|
| 9734 |
"quant_format": "exl3",
|
| 9735 |
+
"bits_per_weight": 6
|
| 9736 |
},
|
| 9737 |
"model.layers.39.self_attn.o_proj": {
|
| 9738 |
"stored_tensors": {
|
|
|
|
| 9754 |
"shape": [
|
| 9755 |
320,
|
| 9756 |
320,
|
| 9757 |
+
80
|
| 9758 |
],
|
| 9759 |
+
"n_bytes": 16384000,
|
| 9760 |
"dtype": "torch.int16"
|
| 9761 |
}
|
| 9762 |
},
|
| 9763 |
"quant_format": "exl3",
|
| 9764 |
+
"bits_per_weight": 5
|
| 9765 |
},
|
| 9766 |
"model.layers.39.post_attention_layernorm": {
|
| 9767 |
"stored_tensors": {
|
|
|
|
| 9794 |
"shape": [
|
| 9795 |
320,
|
| 9796 |
1728,
|
| 9797 |
+
64
|
| 9798 |
],
|
| 9799 |
+
"n_bytes": 70778880,
|
| 9800 |
"dtype": "torch.int16"
|
| 9801 |
}
|
| 9802 |
},
|
| 9803 |
"quant_format": "exl3",
|
| 9804 |
+
"bits_per_weight": 4
|
| 9805 |
},
|
| 9806 |
"model.layers.39.mlp.gate_proj": {
|
| 9807 |
"stored_tensors": {
|
|
|
|
| 10145 |
"shape": [
|
| 10146 |
320,
|
| 10147 |
320,
|
| 10148 |
+
64
|
| 10149 |
],
|
| 10150 |
+
"n_bytes": 13107200,
|
| 10151 |
"dtype": "torch.int16"
|
| 10152 |
}
|
| 10153 |
},
|
| 10154 |
"quant_format": "exl3",
|
| 10155 |
+
"bits_per_weight": 4
|
| 10156 |
},
|
| 10157 |
"model.layers.41.self_attn.k_proj": {
|
| 10158 |
"stored_tensors": {
|
|
|
|
| 10246 |
"shape": [
|
| 10247 |
320,
|
| 10248 |
320,
|
| 10249 |
+
64
|
| 10250 |
],
|
| 10251 |
+
"n_bytes": 13107200,
|
| 10252 |
"dtype": "torch.int16"
|
| 10253 |
}
|
| 10254 |
},
|
| 10255 |
"quant_format": "exl3",
|
| 10256 |
+
"bits_per_weight": 4
|
| 10257 |
},
|
| 10258 |
"model.layers.41.post_attention_layernorm": {
|
| 10259 |
"stored_tensors": {
|
|
|
|
| 10286 |
"shape": [
|
| 10287 |
320,
|
| 10288 |
1728,
|
| 10289 |
+
80
|
| 10290 |
],
|
| 10291 |
+
"n_bytes": 88473600,
|
| 10292 |
"dtype": "torch.int16"
|
| 10293 |
}
|
| 10294 |
},
|
| 10295 |
"quant_format": "exl3",
|
| 10296 |
+
"bits_per_weight": 5
|
| 10297 |
},
|
| 10298 |
"model.layers.41.mlp.gate_proj": {
|
| 10299 |
"stored_tensors": {
|
|
|
|
| 10391 |
"shape": [
|
| 10392 |
320,
|
| 10393 |
320,
|
| 10394 |
+
80
|
| 10395 |
],
|
| 10396 |
+
"n_bytes": 16384000,
|
| 10397 |
"dtype": "torch.int16"
|
| 10398 |
}
|
| 10399 |
},
|
| 10400 |
"quant_format": "exl3",
|
| 10401 |
+
"bits_per_weight": 5
|
| 10402 |
},
|
| 10403 |
"model.layers.42.self_attn.k_proj": {
|
| 10404 |
"stored_tensors": {
|
|
|
|
| 10427 |
"shape": [
|
| 10428 |
320,
|
| 10429 |
64,
|
| 10430 |
+
96
|
| 10431 |
],
|
| 10432 |
+
"n_bytes": 3932160,
|
| 10433 |
"dtype": "torch.int16"
|
| 10434 |
}
|
| 10435 |
},
|
| 10436 |
"quant_format": "exl3",
|
| 10437 |
+
"bits_per_weight": 6
|
| 10438 |
},
|
| 10439 |
"model.layers.42.self_attn.v_proj": {
|
| 10440 |
"stored_tensors": {
|
|
|
|
| 10492 |
"shape": [
|
| 10493 |
320,
|
| 10494 |
320,
|
| 10495 |
+
80
|
| 10496 |
],
|
| 10497 |
+
"n_bytes": 16384000,
|
| 10498 |
"dtype": "torch.int16"
|
| 10499 |
}
|
| 10500 |
},
|
| 10501 |
"quant_format": "exl3",
|
| 10502 |
+
"bits_per_weight": 5
|
| 10503 |
},
|
| 10504 |
"model.layers.42.post_attention_layernorm": {
|
| 10505 |
"stored_tensors": {
|
|
|
|
| 10532 |
"shape": [
|
| 10533 |
320,
|
| 10534 |
1728,
|
| 10535 |
+
64
|
| 10536 |
],
|
| 10537 |
+
"n_bytes": 70778880,
|
| 10538 |
"dtype": "torch.int16"
|
| 10539 |
}
|
| 10540 |
},
|
| 10541 |
"quant_format": "exl3",
|
| 10542 |
+
"bits_per_weight": 4
|
| 10543 |
},
|
| 10544 |
"model.layers.42.mlp.gate_proj": {
|
| 10545 |
"stored_tensors": {
|
|
|
|
| 10883 |
"shape": [
|
| 10884 |
320,
|
| 10885 |
320,
|
| 10886 |
+
64
|
| 10887 |
],
|
| 10888 |
+
"n_bytes": 13107200,
|
| 10889 |
"dtype": "torch.int16"
|
| 10890 |
}
|
| 10891 |
},
|
| 10892 |
"quant_format": "exl3",
|
| 10893 |
+
"bits_per_weight": 4
|
| 10894 |
},
|
| 10895 |
"model.layers.44.self_attn.k_proj": {
|
| 10896 |
"stored_tensors": {
|
|
|
|
| 10984 |
"shape": [
|
| 10985 |
320,
|
| 10986 |
320,
|
| 10987 |
+
64
|
| 10988 |
],
|
| 10989 |
+
"n_bytes": 13107200,
|
| 10990 |
"dtype": "torch.int16"
|
| 10991 |
}
|
| 10992 |
},
|
| 10993 |
"quant_format": "exl3",
|
| 10994 |
+
"bits_per_weight": 4
|
| 10995 |
},
|
| 10996 |
"model.layers.44.post_attention_layernorm": {
|
| 10997 |
"stored_tensors": {
|
|
|
|
| 11024 |
"shape": [
|
| 11025 |
320,
|
| 11026 |
1728,
|
| 11027 |
+
80
|
| 11028 |
],
|
| 11029 |
+
"n_bytes": 88473600,
|
| 11030 |
"dtype": "torch.int16"
|
| 11031 |
}
|
| 11032 |
},
|
| 11033 |
"quant_format": "exl3",
|
| 11034 |
+
"bits_per_weight": 5
|
| 11035 |
},
|
| 11036 |
"model.layers.44.mlp.gate_proj": {
|
| 11037 |
"stored_tensors": {
|
|
|
|
| 11129 |
"shape": [
|
| 11130 |
320,
|
| 11131 |
320,
|
| 11132 |
+
80
|
| 11133 |
],
|
| 11134 |
+
"n_bytes": 16384000,
|
| 11135 |
"dtype": "torch.int16"
|
| 11136 |
}
|
| 11137 |
},
|
| 11138 |
"quant_format": "exl3",
|
| 11139 |
+
"bits_per_weight": 5
|
| 11140 |
},
|
| 11141 |
"model.layers.45.self_attn.k_proj": {
|
| 11142 |
"stored_tensors": {
|
|
|
|
| 11165 |
"shape": [
|
| 11166 |
320,
|
| 11167 |
64,
|
| 11168 |
+
96
|
| 11169 |
],
|
| 11170 |
+
"n_bytes": 3932160,
|
| 11171 |
"dtype": "torch.int16"
|
| 11172 |
}
|
| 11173 |
},
|
| 11174 |
"quant_format": "exl3",
|
| 11175 |
+
"bits_per_weight": 6
|
| 11176 |
},
|
| 11177 |
"model.layers.45.self_attn.v_proj": {
|
| 11178 |
"stored_tensors": {
|
|
|
|
| 11230 |
"shape": [
|
| 11231 |
320,
|
| 11232 |
320,
|
| 11233 |
+
80
|
| 11234 |
],
|
| 11235 |
+
"n_bytes": 16384000,
|
| 11236 |
"dtype": "torch.int16"
|
| 11237 |
}
|
| 11238 |
},
|
| 11239 |
"quant_format": "exl3",
|
| 11240 |
+
"bits_per_weight": 5
|
| 11241 |
},
|
| 11242 |
"model.layers.45.post_attention_layernorm": {
|
| 11243 |
"stored_tensors": {
|
|
|
|
| 11270 |
"shape": [
|
| 11271 |
320,
|
| 11272 |
1728,
|
| 11273 |
+
64
|
| 11274 |
],
|
| 11275 |
+
"n_bytes": 70778880,
|
| 11276 |
"dtype": "torch.int16"
|
| 11277 |
}
|
| 11278 |
},
|
| 11279 |
"quant_format": "exl3",
|
| 11280 |
+
"bits_per_weight": 4
|
| 11281 |
},
|
| 11282 |
"model.layers.45.mlp.gate_proj": {
|
| 11283 |
"stored_tensors": {
|
|
|
|
| 11657 |
"shape": [
|
| 11658 |
320,
|
| 11659 |
64,
|
| 11660 |
+
96
|
| 11661 |
],
|
| 11662 |
+
"n_bytes": 3932160,
|
| 11663 |
"dtype": "torch.int16"
|
| 11664 |
}
|
| 11665 |
},
|
| 11666 |
"quant_format": "exl3",
|
| 11667 |
+
"bits_per_weight": 6
|
| 11668 |
},
|
| 11669 |
"model.layers.47.self_attn.v_proj": {
|
| 11670 |
"stored_tensors": {
|
|
|
|
| 11693 |
"shape": [
|
| 11694 |
320,
|
| 11695 |
64,
|
| 11696 |
+
96
|
| 11697 |
],
|
| 11698 |
+
"n_bytes": 3932160,
|
| 11699 |
"dtype": "torch.int16"
|
| 11700 |
}
|
| 11701 |
},
|
| 11702 |
"quant_format": "exl3",
|
| 11703 |
+
"bits_per_weight": 6
|
| 11704 |
},
|
| 11705 |
"model.layers.47.self_attn.o_proj": {
|
| 11706 |
"stored_tensors": {
|
|
|
|
| 12395 |
"shape": [
|
| 12396 |
320,
|
| 12397 |
64,
|
| 12398 |
+
80
|
| 12399 |
],
|
| 12400 |
+
"n_bytes": 3276800,
|
| 12401 |
"dtype": "torch.int16"
|
| 12402 |
}
|
| 12403 |
},
|
| 12404 |
"quant_format": "exl3",
|
| 12405 |
+
"bits_per_weight": 5
|
| 12406 |
},
|
| 12407 |
"model.layers.50.self_attn.v_proj": {
|
| 12408 |
"stored_tensors": {
|
|
|
|
| 12431 |
"shape": [
|
| 12432 |
320,
|
| 12433 |
64,
|
| 12434 |
+
80
|
| 12435 |
],
|
| 12436 |
+
"n_bytes": 3276800,
|
| 12437 |
"dtype": "torch.int16"
|
| 12438 |
}
|
| 12439 |
},
|
| 12440 |
"quant_format": "exl3",
|
| 12441 |
+
"bits_per_weight": 5
|
| 12442 |
},
|
| 12443 |
"model.layers.50.self_attn.o_proj": {
|
| 12444 |
"stored_tensors": {
|
|
|
|
| 13133 |
"shape": [
|
| 13134 |
320,
|
| 13135 |
64,
|
| 13136 |
+
96
|
| 13137 |
],
|
| 13138 |
+
"n_bytes": 3932160,
|
| 13139 |
"dtype": "torch.int16"
|
| 13140 |
}
|
| 13141 |
},
|
| 13142 |
"quant_format": "exl3",
|
| 13143 |
+
"bits_per_weight": 6
|
| 13144 |
},
|
| 13145 |
"model.layers.53.self_attn.v_proj": {
|
| 13146 |
"stored_tensors": {
|
|
|
|
| 13589 |
"shape": [
|
| 13590 |
320,
|
| 13591 |
320,
|
| 13592 |
+
80
|
| 13593 |
],
|
| 13594 |
+
"n_bytes": 16384000,
|
| 13595 |
"dtype": "torch.int16"
|
| 13596 |
}
|
| 13597 |
},
|
| 13598 |
"quant_format": "exl3",
|
| 13599 |
+
"bits_per_weight": 5
|
| 13600 |
},
|
| 13601 |
"model.layers.55.self_attn.k_proj": {
|
| 13602 |
"stored_tensors": {
|
|
|
|
| 13625 |
"shape": [
|
| 13626 |
320,
|
| 13627 |
64,
|
| 13628 |
+
96
|
| 13629 |
],
|
| 13630 |
+
"n_bytes": 3932160,
|
| 13631 |
"dtype": "torch.int16"
|
| 13632 |
}
|
| 13633 |
},
|
| 13634 |
"quant_format": "exl3",
|
| 13635 |
+
"bits_per_weight": 6
|
| 13636 |
},
|
| 13637 |
"model.layers.55.self_attn.v_proj": {
|
| 13638 |
"stored_tensors": {
|
|
|
|
| 13661 |
"shape": [
|
| 13662 |
320,
|
| 13663 |
64,
|
| 13664 |
+
96
|
| 13665 |
],
|
| 13666 |
+
"n_bytes": 3932160,
|
| 13667 |
"dtype": "torch.int16"
|
| 13668 |
}
|
| 13669 |
},
|
| 13670 |
"quant_format": "exl3",
|
| 13671 |
+
"bits_per_weight": 6
|
| 13672 |
},
|
| 13673 |
"model.layers.55.self_attn.o_proj": {
|
| 13674 |
"stored_tensors": {
|
|
|
|
| 13690 |
"shape": [
|
| 13691 |
320,
|
| 13692 |
320,
|
| 13693 |
+
80
|
| 13694 |
],
|
| 13695 |
+
"n_bytes": 16384000,
|
| 13696 |
"dtype": "torch.int16"
|
| 13697 |
}
|
| 13698 |
},
|
| 13699 |
"quant_format": "exl3",
|
| 13700 |
+
"bits_per_weight": 5
|
| 13701 |
},
|
| 13702 |
"model.layers.55.post_attention_layernorm": {
|
| 13703 |
"stored_tensors": {
|
|
|
|
| 13730 |
"shape": [
|
| 13731 |
320,
|
| 13732 |
1728,
|
| 13733 |
+
64
|
| 13734 |
],
|
| 13735 |
+
"n_bytes": 70778880,
|
| 13736 |
"dtype": "torch.int16"
|
| 13737 |
}
|
| 13738 |
},
|
| 13739 |
"quant_format": "exl3",
|
| 13740 |
+
"bits_per_weight": 4
|
| 13741 |
},
|
| 13742 |
"model.layers.55.mlp.gate_proj": {
|
| 13743 |
"stored_tensors": {
|
|
|
|
| 13835 |
"shape": [
|
| 13836 |
320,
|
| 13837 |
320,
|
| 13838 |
+
64
|
| 13839 |
],
|
| 13840 |
+
"n_bytes": 13107200,
|
| 13841 |
"dtype": "torch.int16"
|
| 13842 |
}
|
| 13843 |
},
|
| 13844 |
"quant_format": "exl3",
|
| 13845 |
+
"bits_per_weight": 4
|
| 13846 |
},
|
| 13847 |
"model.layers.56.self_attn.k_proj": {
|
| 13848 |
"stored_tensors": {
|
|
|
|
| 13936 |
"shape": [
|
| 13937 |
320,
|
| 13938 |
320,
|
| 13939 |
+
64
|
| 13940 |
],
|
| 13941 |
+
"n_bytes": 13107200,
|
| 13942 |
"dtype": "torch.int16"
|
| 13943 |
}
|
| 13944 |
},
|
| 13945 |
"quant_format": "exl3",
|
| 13946 |
+
"bits_per_weight": 4
|
| 13947 |
},
|
| 13948 |
"model.layers.56.post_attention_layernorm": {
|
| 13949 |
"stored_tensors": {
|
|
|
|
| 13976 |
"shape": [
|
| 13977 |
320,
|
| 13978 |
1728,
|
| 13979 |
+
80
|
| 13980 |
],
|
| 13981 |
+
"n_bytes": 88473600,
|
| 13982 |
"dtype": "torch.int16"
|
| 13983 |
}
|
| 13984 |
},
|
| 13985 |
"quant_format": "exl3",
|
| 13986 |
+
"bits_per_weight": 5
|
| 13987 |
},
|
| 13988 |
"model.layers.56.mlp.gate_proj": {
|
| 13989 |
"stored_tensors": {
|
|
|
|
| 14327 |
"shape": [
|
| 14328 |
320,
|
| 14329 |
320,
|
| 14330 |
+
80
|
| 14331 |
],
|
| 14332 |
+
"n_bytes": 16384000,
|
| 14333 |
"dtype": "torch.int16"
|
| 14334 |
}
|
| 14335 |
},
|
| 14336 |
"quant_format": "exl3",
|
| 14337 |
+
"bits_per_weight": 5
|
| 14338 |
},
|
| 14339 |
"model.layers.58.self_attn.k_proj": {
|
| 14340 |
"stored_tensors": {
|
|
|
|
| 14363 |
"shape": [
|
| 14364 |
320,
|
| 14365 |
64,
|
| 14366 |
+
96
|
| 14367 |
],
|
| 14368 |
+
"n_bytes": 3932160,
|
| 14369 |
"dtype": "torch.int16"
|
| 14370 |
}
|
| 14371 |
},
|
| 14372 |
"quant_format": "exl3",
|
| 14373 |
+
"bits_per_weight": 6
|
| 14374 |
},
|
| 14375 |
"model.layers.58.self_attn.v_proj": {
|
| 14376 |
"stored_tensors": {
|
|
|
|
| 14428 |
"shape": [
|
| 14429 |
320,
|
| 14430 |
320,
|
| 14431 |
+
80
|
| 14432 |
],
|
| 14433 |
+
"n_bytes": 16384000,
|
| 14434 |
"dtype": "torch.int16"
|
| 14435 |
}
|
| 14436 |
},
|
| 14437 |
"quant_format": "exl3",
|
| 14438 |
+
"bits_per_weight": 5
|
| 14439 |
},
|
| 14440 |
"model.layers.58.post_attention_layernorm": {
|
| 14441 |
"stored_tensors": {
|
|
|
|
| 14468 |
"shape": [
|
| 14469 |
320,
|
| 14470 |
1728,
|
| 14471 |
+
64
|
| 14472 |
],
|
| 14473 |
+
"n_bytes": 70778880,
|
| 14474 |
"dtype": "torch.int16"
|
| 14475 |
}
|
| 14476 |
},
|
| 14477 |
"quant_format": "exl3",
|
| 14478 |
+
"bits_per_weight": 4
|
| 14479 |
},
|
| 14480 |
"model.layers.58.mlp.gate_proj": {
|
| 14481 |
"stored_tensors": {
|
|
|
|
| 14573 |
"shape": [
|
| 14574 |
320,
|
| 14575 |
320,
|
| 14576 |
+
64
|
| 14577 |
],
|
| 14578 |
+
"n_bytes": 13107200,
|
| 14579 |
"dtype": "torch.int16"
|
| 14580 |
}
|
| 14581 |
},
|
| 14582 |
"quant_format": "exl3",
|
| 14583 |
+
"bits_per_weight": 4
|
| 14584 |
},
|
| 14585 |
"model.layers.59.self_attn.k_proj": {
|
| 14586 |
"stored_tensors": {
|
|
|
|
| 14674 |
"shape": [
|
| 14675 |
320,
|
| 14676 |
320,
|
| 14677 |
+
64
|
| 14678 |
],
|
| 14679 |
+
"n_bytes": 13107200,
|
| 14680 |
"dtype": "torch.int16"
|
| 14681 |
}
|
| 14682 |
},
|
| 14683 |
"quant_format": "exl3",
|
| 14684 |
+
"bits_per_weight": 4
|
| 14685 |
},
|
| 14686 |
"model.layers.59.post_attention_layernorm": {
|
| 14687 |
"stored_tensors": {
|
|
|
|
| 14714 |
"shape": [
|
| 14715 |
320,
|
| 14716 |
1728,
|
| 14717 |
+
80
|
| 14718 |
],
|
| 14719 |
+
"n_bytes": 88473600,
|
| 14720 |
"dtype": "torch.int16"
|
| 14721 |
}
|
| 14722 |
},
|
| 14723 |
"quant_format": "exl3",
|
| 14724 |
+
"bits_per_weight": 5
|
| 14725 |
},
|
| 14726 |
"model.layers.59.mlp.gate_proj": {
|
| 14727 |
"stored_tensors": {
|
|
|
|
| 15065 |
"shape": [
|
| 15066 |
320,
|
| 15067 |
320,
|
| 15068 |
+
80
|
| 15069 |
],
|
| 15070 |
+
"n_bytes": 16384000,
|
| 15071 |
"dtype": "torch.int16"
|
| 15072 |
}
|
| 15073 |
},
|
| 15074 |
"quant_format": "exl3",
|
| 15075 |
+
"bits_per_weight": 5
|
| 15076 |
},
|
| 15077 |
"model.layers.61.self_attn.k_proj": {
|
| 15078 |
"stored_tensors": {
|
|
|
|
| 15101 |
"shape": [
|
| 15102 |
320,
|
| 15103 |
64,
|
| 15104 |
+
96
|
| 15105 |
],
|
| 15106 |
+
"n_bytes": 3932160,
|
| 15107 |
"dtype": "torch.int16"
|
| 15108 |
}
|
| 15109 |
},
|
| 15110 |
"quant_format": "exl3",
|
| 15111 |
+
"bits_per_weight": 6
|
| 15112 |
},
|
| 15113 |
"model.layers.61.self_attn.v_proj": {
|
| 15114 |
"stored_tensors": {
|
|
|
|
| 15166 |
"shape": [
|
| 15167 |
320,
|
| 15168 |
320,
|
| 15169 |
+
80
|
| 15170 |
],
|
| 15171 |
+
"n_bytes": 16384000,
|
| 15172 |
"dtype": "torch.int16"
|
| 15173 |
}
|
| 15174 |
},
|
| 15175 |
"quant_format": "exl3",
|
| 15176 |
+
"bits_per_weight": 5
|
| 15177 |
},
|
| 15178 |
"model.layers.61.post_attention_layernorm": {
|
| 15179 |
"stored_tensors": {
|
|
|
|
| 15206 |
"shape": [
|
| 15207 |
320,
|
| 15208 |
1728,
|
| 15209 |
+
64
|
| 15210 |
],
|
| 15211 |
+
"n_bytes": 70778880,
|
| 15212 |
"dtype": "torch.int16"
|
| 15213 |
}
|
| 15214 |
},
|
| 15215 |
"quant_format": "exl3",
|
| 15216 |
+
"bits_per_weight": 4
|
| 15217 |
},
|
| 15218 |
"model.layers.61.mlp.gate_proj": {
|
| 15219 |
"stored_tensors": {
|
|
|
|
| 15311 |
"shape": [
|
| 15312 |
320,
|
| 15313 |
320,
|
| 15314 |
+
64
|
| 15315 |
],
|
| 15316 |
+
"n_bytes": 13107200,
|
| 15317 |
"dtype": "torch.int16"
|
| 15318 |
}
|
| 15319 |
},
|
| 15320 |
"quant_format": "exl3",
|
| 15321 |
+
"bits_per_weight": 4
|
| 15322 |
},
|
| 15323 |
"model.layers.62.self_attn.k_proj": {
|
| 15324 |
"stored_tensors": {
|
|
|
|
| 15347 |
"shape": [
|
| 15348 |
320,
|
| 15349 |
64,
|
| 15350 |
+
80
|
| 15351 |
],
|
| 15352 |
+
"n_bytes": 3276800,
|
| 15353 |
"dtype": "torch.int16"
|
| 15354 |
}
|
| 15355 |
},
|
| 15356 |
"quant_format": "exl3",
|
| 15357 |
+
"bits_per_weight": 5
|
| 15358 |
},
|
| 15359 |
"model.layers.62.self_attn.v_proj": {
|
| 15360 |
"stored_tensors": {
|
|
|
|
| 15383 |
"shape": [
|
| 15384 |
320,
|
| 15385 |
64,
|
| 15386 |
+
80
|
| 15387 |
],
|
| 15388 |
+
"n_bytes": 3276800,
|
| 15389 |
"dtype": "torch.int16"
|
| 15390 |
}
|
| 15391 |
},
|
| 15392 |
"quant_format": "exl3",
|
| 15393 |
+
"bits_per_weight": 5
|
| 15394 |
},
|
| 15395 |
"model.layers.62.self_attn.o_proj": {
|
| 15396 |
"stored_tensors": {
|
|
|
|
| 15412 |
"shape": [
|
| 15413 |
320,
|
| 15414 |
320,
|
| 15415 |
+
64
|
| 15416 |
],
|
| 15417 |
+
"n_bytes": 13107200,
|
| 15418 |
"dtype": "torch.int16"
|
| 15419 |
}
|
| 15420 |
},
|
| 15421 |
"quant_format": "exl3",
|
| 15422 |
+
"bits_per_weight": 4
|
| 15423 |
},
|
| 15424 |
"model.layers.62.post_attention_layernorm": {
|
| 15425 |
"stored_tensors": {
|
|
|
|
| 15452 |
"shape": [
|
| 15453 |
320,
|
| 15454 |
1728,
|
| 15455 |
+
80
|
| 15456 |
],
|
| 15457 |
+
"n_bytes": 88473600,
|
| 15458 |
"dtype": "torch.int16"
|
| 15459 |
}
|
| 15460 |
},
|
| 15461 |
"quant_format": "exl3",
|
| 15462 |
+
"bits_per_weight": 5
|
| 15463 |
},
|
| 15464 |
"model.layers.62.mlp.gate_proj": {
|
| 15465 |
"stored_tensors": {
|
|
|
|
| 15557 |
"shape": [
|
| 15558 |
320,
|
| 15559 |
320,
|
| 15560 |
+
80
|
| 15561 |
],
|
| 15562 |
+
"n_bytes": 16384000,
|
| 15563 |
"dtype": "torch.int16"
|
| 15564 |
}
|
| 15565 |
},
|
| 15566 |
"quant_format": "exl3",
|
| 15567 |
+
"bits_per_weight": 5
|
| 15568 |
},
|
| 15569 |
"model.layers.63.self_attn.k_proj": {
|
| 15570 |
"stored_tensors": {
|
|
|
|
| 15593 |
"shape": [
|
| 15594 |
320,
|
| 15595 |
64,
|
| 15596 |
+
96
|
| 15597 |
],
|
| 15598 |
+
"n_bytes": 3932160,
|
| 15599 |
"dtype": "torch.int16"
|
| 15600 |
}
|
| 15601 |
},
|
| 15602 |
"quant_format": "exl3",
|
| 15603 |
+
"bits_per_weight": 6
|
| 15604 |
},
|
| 15605 |
"model.layers.63.self_attn.v_proj": {
|
| 15606 |
"stored_tensors": {
|
|
|
|
| 15629 |
"shape": [
|
| 15630 |
320,
|
| 15631 |
64,
|
| 15632 |
+
96
|
| 15633 |
],
|
| 15634 |
+
"n_bytes": 3932160,
|
| 15635 |
"dtype": "torch.int16"
|
| 15636 |
}
|
| 15637 |
},
|
| 15638 |
"quant_format": "exl3",
|
| 15639 |
+
"bits_per_weight": 6
|
| 15640 |
},
|
| 15641 |
"model.layers.63.self_attn.o_proj": {
|
| 15642 |
"stored_tensors": {
|
|
|
|
| 15658 |
"shape": [
|
| 15659 |
320,
|
| 15660 |
320,
|
| 15661 |
+
80
|
| 15662 |
],
|
| 15663 |
+
"n_bytes": 16384000,
|
| 15664 |
"dtype": "torch.int16"
|
| 15665 |
}
|
| 15666 |
},
|
| 15667 |
"quant_format": "exl3",
|
| 15668 |
+
"bits_per_weight": 5
|
| 15669 |
},
|
| 15670 |
"model.layers.63.post_attention_layernorm": {
|
| 15671 |
"stored_tensors": {
|
|
|
|
| 15698 |
"shape": [
|
| 15699 |
320,
|
| 15700 |
1728,
|
| 15701 |
+
64
|
| 15702 |
],
|
| 15703 |
+
"n_bytes": 70778880,
|
| 15704 |
"dtype": "torch.int16"
|
| 15705 |
}
|
| 15706 |
},
|
| 15707 |
"quant_format": "exl3",
|
| 15708 |
+
"bits_per_weight": 4
|
| 15709 |
},
|
| 15710 |
"model.layers.63.mlp.gate_proj": {
|
| 15711 |
"stored_tensors": {
|