| { | |
| "vocab_size": 50257, | |
| "embedding_dimension": 512, | |
| "num_heads": 8, | |
| "context_length": 256, | |
| "token_dropout": 0.03, | |
| "attn_dropout": 0.2, | |
| "ffn_dropout": 0.2, | |
| "qkv_bias": false, | |
| "num_layers": 8, | |
| "ff_hidden_dim": 1024, | |
| "rms_eps": 1e-06, | |
| "rms_bias": true, | |
| "theta_base": 10000.0, | |
| "num_kv_groups": 4, | |
| "num_experts": 4, | |
| "num_active_experts": 2, | |
| "moe_noise": true, | |
| "architectures": [ | |
| "GQAGPT2" | |
| ], | |
| "model_type": "customGPT_pretrain" | |
| } |