broadfield-dev commited on
Commit
fd28273
·
verified ·
1 Parent(s): c2a4575

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -47,11 +47,11 @@ def setup_llama_cpp():
47
  raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
48
 
49
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
50
- logging.info("llama.cpp 'quantize' binary not found. Building with CMake...")
51
  try:
52
  subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
53
- subprocess.run(["cmake", "--build", ".", "--target", "quantize"], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
54
- logging.info("'quantize' binary built successfully with CMake.")
55
  except subprocess.CalledProcessError as e:
56
  raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
57
 
@@ -121,12 +121,13 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
121
  model_name = original_model_id.replace('/', '_')
122
  gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
123
  os.makedirs(gguf_path, exist_ok=True)
124
- f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
125
- quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
 
126
  try:
127
- convert_command = ["python3", "convert.py", model_path_or_id, "--outfile", f16_gguf_path, "--outtype", "f16"]
128
  process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
129
- log_stream += f"Executing llama.cpp conversion script on '{model_path_or_id}'...\n{process.stdout}\n"
130
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
131
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
132
  target_quant_name = quantize_map.get(quantization_strategy.lower(), "Q4_K_M")
 
47
  raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
48
 
49
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
50
+ logging.info("llama.cpp binaries not found. Building with CMake...")
51
  try:
52
  subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
53
+ subprocess.run(["cmake", "--build", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
54
+ logging.info("llama.cpp binaries built successfully with CMake.")
55
  except subprocess.CalledProcessError as e:
56
  raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
57
 
 
121
  model_name = original_model_id.replace('/', '_')
122
  gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
123
  os.makedirs(gguf_path, exist_ok=True)
124
+ f16_gguf_path = os.path.abspath(os.path.join(gguf_path, "model-f16.gguf"))
125
+ quantized_gguf_path = os.path.abspath(os.path.join(gguf_path, "model.gguf"))
126
+ absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
127
  try:
128
+ convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
129
  process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
130
+ log_stream += f"Executing llama.cpp conversion script...\n{process.stdout}\n"
131
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
132
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
133
  target_quant_name = quantize_map.get(quantization_strategy.lower(), "Q4_K_M")