Naphula commited on
Commit
01938b3
·
verified ·
1 Parent(s): e2cf457

Upload lm_head_remover.py

Browse files
Files changed (1) hide show
  1. lm_head_remover.py +66 -0
lm_head_remover.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import os
4
+ import argparse
5
+
6
+ def fix_model(input_path, output_path):
7
+ print(f"Loading model from {input_path}...")
8
+ print("This may take a moment as we load it into RAM...")
9
+
10
+ # Load the model in bfloat16 to match the target format
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ input_path,
13
+ torch_dtype=torch.bfloat16,
14
+ device_map="cpu", # Load to CPU to avoid VRAM OOM during save
15
+ trust_remote_code=True
16
+ )
17
+
18
+ # Load tokenizer to ensure it carries over
19
+ try:
20
+ tokenizer = AutoTokenizer.from_pretrained(input_path)
21
+ except:
22
+ print("Warning: Could not load tokenizer. You may need to copy it manually.")
23
+ tokenizer = None
24
+
25
+ print("Forcing weight tying...")
26
+ # This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object.
27
+ model.tie_weights()
28
+
29
+ # --- THE FIX FOR YOUR ERROR ---
30
+ # The error happens because the model config has conflicting settings.
31
+ # We force use_cache to True to satisfy the 'hybrid' cache implementation requirement.
32
+ print("Fixing Generation Config conflicts...")
33
+ model.config.use_cache = True
34
+ if model.generation_config is not None:
35
+ model.generation_config.use_cache = True
36
+ # ------------------------------
37
+
38
+ print(f"Saving fixed model to {output_path}...")
39
+ # max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models
40
+ model.save_pretrained(
41
+ output_path,
42
+ safe_serialization=True,
43
+ max_shard_size="5GB"
44
+ )
45
+
46
+ if tokenizer:
47
+ tokenizer.save_pretrained(output_path)
48
+
49
+ print("Done! The model should now be ~17.2GB and merge-compatible.")
50
+
51
+ if __name__ == "__main__":
52
+ # --- CONFIGURATION ---
53
+ # Use 'r' before the string to handle backslashes correctly on Windows
54
+
55
+ # 1. Where is the 18.9GB model? (Current folder)
56
+ input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1"
57
+
58
+ # 2. Where do you want the fixed 17.2GB model?
59
+ output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed"
60
+ # ---------------------
61
+
62
+ import os
63
+ if not os.path.exists(input_model_path):
64
+ print(f"Error: Input path '{input_model_path}' does not exist.")
65
+ else:
66
+ fix_model(input_model_path, output_model_path)