Spaces:

Naphula
/

model_tools

Running

App Files Files Community

Naphula commited on Nov 26, 2025

Commit

01938b3

verified ·

1 Parent(s): e2cf457

Upload lm_head_remover.py

Browse files

Files changed (1) hide show

lm_head_remover.py +66 -0

lm_head_remover.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
+import argparse
+def fix_model(input_path, output_path):
+    print(f"Loading model from {input_path}...")
+    print("This may take a moment as we load it into RAM...")
+    # Load the model in bfloat16 to match the target format
+    model = AutoModelForCausalLM.from_pretrained(
+        input_path,
+        torch_dtype=torch.bfloat16,
+        device_map="cpu", # Load to CPU to avoid VRAM OOM during save
+        trust_remote_code=True
+    )
+    # Load tokenizer to ensure it carries over
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(input_path)
+    except:
+        print("Warning: Could not load tokenizer. You may need to copy it manually.")
+        tokenizer = None
+    print("Forcing weight tying...")
+    # This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object.
+    model.tie_weights()
+    # --- THE FIX FOR YOUR ERROR ---
+    # The error happens because the model config has conflicting settings.
+    # We force use_cache to True to satisfy the 'hybrid' cache implementation requirement.
+    print("Fixing Generation Config conflicts...")
+    model.config.use_cache = True
+    if model.generation_config is not None:
+        model.generation_config.use_cache = True
+    # ------------------------------
+    print(f"Saving fixed model to {output_path}...")
+    # max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models
+    model.save_pretrained(
+        output_path,
+        safe_serialization=True,
+        max_shard_size="5GB"
+    )
+    if tokenizer:
+        tokenizer.save_pretrained(output_path)
+    print("Done! The model should now be ~17.2GB and merge-compatible.")
+if __name__ == "__main__":
+    # --- CONFIGURATION ---
+    # Use 'r' before the string to handle backslashes correctly on Windows
+    # 1. Where is the 18.9GB model? (Current folder)
+    input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1"
+    # 2. Where do you want the fixed 17.2GB model?
+    output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed"
+    # ---------------------
+    import os
+    if not os.path.exists(input_model_path):
+        print(f"Error: Input path '{input_model_path}' does not exist.")
+    else:
+        fix_model(input_model_path, output_model_path)