NamrataThakur's picture
Initial upload of custom GPT model
883c572 verified
raw
history blame contribute delete
510 Bytes
{
"vocab_size": 50257,
"embedding_dimension": 512,
"num_heads": 8,
"context_length": 256,
"token_dropout": 0.03,
"attn_dropout": 0.2,
"ffn_dropout": 0.2,
"qkv_bias": false,
"num_layers": 8,
"ff_hidden_dim": 1024,
"rms_eps": 1e-06,
"rms_bias": true,
"theta_base": 10000.0,
"num_kv_groups": 4,
"num_experts": 4,
"num_active_experts": 2,
"moe_noise": true,
"architectures": [
"GQAGPT2"
],
"model_type": "customGPT_pretrain"
}