Spaces:
Running
Running
| import os | |
| import json | |
| import sys | |
| # ============================================================================== | |
| # CONFIGURATION | |
| # ============================================================================== | |
| SEARCH_ROOTS = [ | |
| r"B:\LLM\.cache\huggingface\hub", | |
| r"C:\.cache" | |
| ] | |
| # Mistral Nemo Baseline (Tekken Tokenizer) | |
| BASELINE_VOCAB = 131072 | |
| BASELINE_ROPE = 1000000.0 | |
| # ============================================================================== | |
| # SCRIPT | |
| # ============================================================================== | |
| def find_model_paths(roots): | |
| model_paths = [] | |
| print(f"π Scanning directories for config.json...") | |
| for root_dir in roots: | |
| if not os.path.exists(root_dir): | |
| print(f"β οΈ Warning: Directory not found: {root_dir}") | |
| continue | |
| print(f" -> Crawling {root_dir} (this may take a moment)...") | |
| count = 0 | |
| for root, dirs, files in os.walk(root_dir): | |
| if "config.json" in files: | |
| model_paths.append(root) | |
| count += 1 | |
| # Optional: Optimization to stop diving deeper if we found a model root | |
| # (Commented out to ensure we find snapshots in HF cache structure) | |
| # dirs[:] = [] | |
| print(f" Found {count} models in {root_dir}") | |
| return model_paths | |
| def check_models(): | |
| paths = find_model_paths(SEARCH_ROOTS) | |
| if not paths: | |
| print("\nβ No models found in the specified directories.") | |
| return | |
| print("\n" + "="*110) | |
| print(f"{'Model Name (Short)':<45} | {'Vocab':<8} | {'RoPE Theta':<12} | {'EOS ID':<8} | {'Status'}") | |
| print("="*110) | |
| suspects = [] | |
| for path in paths: | |
| config_path = os.path.join(path, "config.json") | |
| try: | |
| with open(config_path, 'r', encoding='utf-8') as f: | |
| cfg = json.load(f) | |
| except Exception as e: | |
| print(f"β Error reading {path}: {e}") | |
| continue | |
| # Extract Metadata | |
| vocab_size = cfg.get("vocab_size", 0) | |
| rope_theta = cfg.get("rope_theta", 0.0) | |
| eos_id = cfg.get("eos_token_id", "N/A") | |
| arch = cfg.get("architectures", ["Unknown"])[0] | |
| # Clean up the name for display (handle HF cache paths) | |
| name = os.path.basename(path) | |
| if "snapshots" in path: | |
| # Try to get the folder name above 'snapshots' for better readability | |
| try: | |
| parent = os.path.dirname(os.path.dirname(path)) | |
| name = os.path.basename(parent).replace("models--", "") | |
| except: | |
| pass | |
| # --- THE AUDIT LOGIC --- | |
| flags = [] | |
| is_suspect = False | |
| # Check Vocab (The most likely cause of your EOS bug) | |
| # Mistral Nemo is 131072. Llama 3 is 128256. Old Mistral is 32000. | |
| if vocab_size != BASELINE_VOCAB: | |
| flags.append(f"VOCAB({vocab_size})") | |
| is_suspect = True | |
| # Check RoPE (Nemo is 1,000,000. Standard is 10,000) | |
| if float(rope_theta) != float(BASELINE_ROPE): | |
| flags.append(f"ROPE({int(rope_theta)})") | |
| is_suspect = True | |
| # Check EOS (Multi-EOS can confuse mergekit) | |
| if isinstance(eos_id, list) and len(eos_id) > 1: | |
| flags.append("MULTI-EOS") | |
| # This isn't always fatal, but good to know | |
| status = "β OK" if not is_suspect else f"π© {' '.join(flags)}" | |
| # Print Row | |
| print(f"{name[:45]:<45} | {str(vocab_size):<8} | {str(rope_theta):<12} | {str(eos_id):<8} | {status}") | |
| if is_suspect: | |
| suspects.append((name, path, flags)) | |
| print("\n" + "="*110) | |
| if suspects: | |
| print(f"π¨ DETECTED {len(suspects)} POTENTIALLY INCOMPATIBLE MODELS:") | |
| print("These models do not match the Mistral Nemo baseline (Vocab 131k, RoPE 1M).") | |
| print("Including them in the merge is likely causing the 'One Sentence' bug. Use vocab_resizer.py to fix.\n") | |
| for s_name, s_path, s_flags in suspects: | |
| print(f"β {s_name}") | |
| print(f" Path: {s_path}") | |
| print(f" Issues: {', '.join(s_flags)}\n") | |
| else: | |
| print("β All scanned models match the Mistral Nemo baseline specs.") | |
| if __name__ == "__main__": | |
| check_models() |