Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,11 +47,11 @@ def setup_llama_cpp():
|
|
| 47 |
raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
|
| 48 |
|
| 49 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 50 |
-
logging.info("llama.cpp
|
| 51 |
try:
|
| 52 |
subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 53 |
-
subprocess.run(["cmake", "--build", "."
|
| 54 |
-
logging.info("
|
| 55 |
except subprocess.CalledProcessError as e:
|
| 56 |
raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
|
| 57 |
|
|
@@ -121,12 +121,13 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
|
|
| 121 |
model_name = original_model_id.replace('/', '_')
|
| 122 |
gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
|
| 123 |
os.makedirs(gguf_path, exist_ok=True)
|
| 124 |
-
f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
|
| 125 |
-
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
|
|
|
| 126 |
try:
|
| 127 |
-
convert_command = ["python3", "convert.py",
|
| 128 |
process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
|
| 129 |
-
log_stream += f"Executing llama.cpp conversion script
|
| 130 |
if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
|
| 131 |
quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
|
| 132 |
target_quant_name = quantize_map.get(quantization_strategy.lower(), "Q4_K_M")
|
|
|
|
| 47 |
raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
|
| 48 |
|
| 49 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 50 |
+
logging.info("llama.cpp binaries not found. Building with CMake...")
|
| 51 |
try:
|
| 52 |
subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 53 |
+
subprocess.run(["cmake", "--build", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 54 |
+
logging.info("llama.cpp binaries built successfully with CMake.")
|
| 55 |
except subprocess.CalledProcessError as e:
|
| 56 |
raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
|
| 57 |
|
|
|
|
| 121 |
model_name = original_model_id.replace('/', '_')
|
| 122 |
gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
|
| 123 |
os.makedirs(gguf_path, exist_ok=True)
|
| 124 |
+
f16_gguf_path = os.path.abspath(os.path.join(gguf_path, "model-f16.gguf"))
|
| 125 |
+
quantized_gguf_path = os.path.abspath(os.path.join(gguf_path, "model.gguf"))
|
| 126 |
+
absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
|
| 127 |
try:
|
| 128 |
+
convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
| 129 |
process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
|
| 130 |
+
log_stream += f"Executing llama.cpp conversion script...\n{process.stdout}\n"
|
| 131 |
if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
|
| 132 |
quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
|
| 133 |
target_quant_name = quantize_map.get(quantization_strategy.lower(), "Q4_K_M")
|