Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,8 +27,8 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
| 27 |
|
| 28 |
# --- LLAMA.CPP SETUP ---
|
| 29 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 30 |
-
## FINAL FIX: The script is
|
| 31 |
-
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
|
| 32 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 33 |
|
| 34 |
def setup_llama_cpp():
|
|
@@ -92,7 +92,7 @@ def stage_2_prune_model(model, prune_percentage: float):
|
|
| 92 |
|
| 93 |
def stage_3_4_onnx_quantize(model_path_or_id: str, onnx_quant_type: str, calibration_data_path: str):
|
| 94 |
log_stream = "[STAGE 3 & 4] Converting to ONNX and Quantizing...\n"
|
| 95 |
-
run_id = datetime.now().strftime("%
|
| 96 |
model_name = model_path_or_id.split('/')[-1]
|
| 97 |
onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
|
| 98 |
|
|
@@ -224,7 +224,7 @@ def run_amop_pipeline(model_id: str, pipeline_type: str, do_prune: bool, prune_p
|
|
| 224 |
full_log += log
|
| 225 |
|
| 226 |
full_log += "Packaging & Uploading...\n"; yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
|
| 227 |
-
final_message, log = stage_5_package_and_upload(model_id,
|
| 228 |
full_log += log
|
| 229 |
|
| 230 |
yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
|
|
|
|
| 27 |
|
| 28 |
# --- LLAMA.CPP SETUP ---
|
| 29 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 30 |
+
## FINAL FIX: The correct, stable script is in the 'tools' subdirectory.
|
| 31 |
+
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "tools" / "convert-hf-to-gguf.py"
|
| 32 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 33 |
|
| 34 |
def setup_llama_cpp():
|
|
|
|
| 92 |
|
| 93 |
def stage_3_4_onnx_quantize(model_path_or_id: str, onnx_quant_type: str, calibration_data_path: str):
|
| 94 |
log_stream = "[STAGE 3 & 4] Converting to ONNX and Quantizing...\n"
|
| 95 |
+
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 96 |
model_name = model_path_or_id.split('/')[-1]
|
| 97 |
onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
|
| 98 |
|
|
|
|
| 224 |
full_log += log
|
| 225 |
|
| 226 |
full_log += "Packaging & Uploading...\n"; yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
|
| 227 |
+
final_message, log = stage_5_package_and_upload(model_id, optimized_path, full_log, options)
|
| 228 |
full_log += log
|
| 229 |
|
| 230 |
yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
|