{ "vocab_size": 50257, "embedding_dimension": 512, "num_heads": 8, "context_length": 256, "token_dropout": 0.03, "attn_dropout": 0.2, "ffn_dropout": 0.2, "qkv_bias": false, "num_layers": 8, "ff_hidden_dim": 1024, "rms_eps": 1e-06, "rms_bias": true, "theta_base": 10000.0, "num_kv_groups": 4, "num_experts": 4, "num_active_experts": 2, "moe_noise": true, "architectures": [ "GQAGPT2" ], "model_type": "customGPT_pretrain" }