| { | |
| "model_type": "phi", | |
| "architecture": "MiniMax-M2", | |
| "vocab_size": 51200, | |
| "max_position_embeddings": 32768, | |
| "num_attention_heads": 16, | |
| "num_key_value_heads": 2, | |
| "num_hidden_layers": 36, | |
| "intermediate_size": 8192, | |
| "hidden_size": 2048, | |
| "rms_norm_epsilon": 1e-6, | |
| "rope_theta": 10000.0, | |
| "pad_token_id": 50256, | |
| "eos_token_id": 50256, | |
| "bos_token_id": 50256, | |
| "torch_dtype": "float16", | |
| "model_specifics": { | |
| "total_parameters": 3090000000, | |
| "non_embedding_parameters": 2770000000, | |
| "embedding_parameters": 320000000, | |
| "parameter_percentage": { | |
| "embedding_layer": 0.104, | |
| "transformer_layers": 0.793, | |
| "layer_norm": 0.003 | |
| } | |
| }, | |
| "optimization_config": { | |
| "quantization": { | |
| "supported_formats": ["fp32", "fp16", "int8", "int4"], | |
| "recommended": { | |
| "memory_optimized": "int8", | |
| "performance_optimized": "fp16", | |
| "memory_constrained": "int4" | |
| } | |
| }, | |
| "memory_requirements": { | |
| "fp32": 12.0, | |
| "fp16": 6.0, | |
| "int8": 3.5, | |
| "int4": 2.0, | |
| "runtime_activation": 0.5 | |
| }, | |
| "inference_optimization": { | |
| "flash_attention": true, | |
| "gradient_checkpointing": true, | |
| "mixed_precision": true, | |
| "dynamic_batching": false | |
| } | |
| }, | |
| "training_config": { | |
| "base_model": "microsoft/phi-2", | |
| "context_length": 32768, | |
| "batch_size": { | |
| "train": 8, | |
| "eval": 8, | |
| "gradient_accumulation": 4 | |
| }, | |
| "learning_rate": 1e-4, | |
| "num_epochs": 3, | |
| "warmup_steps": 1000, | |
| "max_grad_norm": 1.0, | |
| "weight_decay": 0.01, | |
| "logging_steps": 100, | |
| "save_steps": 1000, | |
| "eval_steps": 1000 | |
| }, | |
| "specialization": { | |
| "primary_languages": ["javascript", "typescript", "xml", "html", "css", "mdx"], | |
| "domain_focus": "web_development", | |
| "on_device_ready": true, | |
| "memory_optimized": true, | |
| "context_extended": true | |
| }, | |
| "evaluation_targets": { | |
| "mmlu_code_score": ">60%", | |
| "humaneval": ">40%", | |
| "codebleu": ">0.65", | |
| "syntax_validity": ">95%", | |
| "semantic_coherence": ">0.80" | |
| }, | |
| "tokenization": { | |
| "base_tokenizer": "microsoft/codebert-base", | |
| "tokenizer_max_length": 8192, | |
| "special_tokens": { | |
| "javascript": ["<js>", "</js>", "<function>", "</function>", "<react>", "</react>"], | |
| "xml": ["<xml>", "</xml>", "<element>", "</element>", "<config>", "</config>"], | |
| "mdx": ["<mdx>", "</mdx>", "<component>", "</component>", "<interactive>", "</interactive>"] | |
| } | |
| }, | |
| "dataset_distribution": { | |
| "total_training_tokens": "500B", | |
| "language_distribution": { | |
| "javascript_typescript": 0.35, | |
| "xml_html": 0.25, | |
| "mdx_markdown": 0.15, | |
| "css_scss": 0.10, | |
| "other_languages": 0.15 | |
| }, | |
| "task_distribution": { | |
| "code_completion": 0.40, | |
| "instruction_following": 0.25, | |
| "code_explanation": 0.20, | |
| "generation": 0.10, | |
| "debugging": 0.05 | |
| } | |
| }, | |
| "quality_metrics": { | |
| "data_quality_threshold": 0.85, | |
| "duplication_rate_max": 0.05, | |
| "language_accuracy": 0.95, | |
| "syntax_validity_min": 0.90, | |
| "semantic_coherence_min": 0.75 | |
| }, | |
| "deployment_config": { | |
| "target_memory_gb": "6-12", | |
| "quantization_strategies": { | |
| "mobile": "int8", | |
| "edge": "int8", | |
| "desktop": "fp16", | |
| "server": "fp16" | |
| }, | |
| "inference_time_target": { | |
| "512_tokens": "<100ms", | |
| "1024_tokens": "<200ms", | |
| "2048_tokens": "<400ms" | |
| } | |
| } | |
| } |