broadfield-dev commited on
Commit
0ed6d3e
·
verified ·
1 Parent(s): e5fb1ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -38
app.py CHANGED
@@ -11,6 +11,7 @@ from huggingface_hub import HfApi
11
  from transformers import AutoConfig, AutoModel, AutoTokenizer
12
  from optimum.onnxruntime import ORTQuantizer
13
  from optimum.onnxruntime.configuration import AutoQuantizationConfig
 
14
  import torch.nn.utils.prune as prune
15
 
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -119,24 +120,21 @@ def stage_3_4_gguf_quantize(model_path: str, model_id: str, quantization_strateg
119
  output_file = os.path.join(gguf_path, "model.gguf")
120
 
121
  try:
122
- log_stream += "Executing `optimum-gguf-cli` via subprocess...\n"
123
- export_command = [
124
- "optimum-gguf-cli",
125
- "--model", model_path,
126
- "--quantization-strategy", quantization_strategy,
127
- "--trust-remote-code",
128
- "--output", output_file
129
- ]
130
- process = subprocess.run(export_command, check=True, capture_output=True, text=True)
131
- log_stream += process.stdout
132
- if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
133
  log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
134
  return gguf_path, log_stream
135
- except subprocess.CalledProcessError as e:
136
- error_msg = f"Failed during `optimum-gguf-cli`. Error:\n{e.stderr}"
137
- logging.error(error_msg)
138
  raise RuntimeError(error_msg)
139
 
 
140
  def stage_5_package_and_upload(model_id: str, optimized_model_path: str, pipeline_log: str, options: dict):
141
  log_stream = "[STAGE 5] Packaging and Uploading...\n"
142
  if not HF_TOKEN:
@@ -286,17 +284,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
286
  ["ONNX", "GGUF"], label="Select Optimization Pipeline", info="GGUF is recommended for LLMs, ONNX for others."
287
  )
288
 
 
 
 
 
289
  with gr.Group(visible=False) as onnx_options:
290
- gr.Markdown("#### ONNX Pipeline Options")
291
- prune_checkbox_onnx = gr.Checkbox(label="Enable Pruning", value=False, info="Removes redundant weights. Applied before ONNX conversion.")
292
- prune_slider_onnx = gr.Slider(minimum=0, maximum=90, value=20, step=5, label="Pruning Percentage (%)")
293
  onnx_quant_radio = gr.Radio(["Dynamic", "Static"], label="ONNX Quantization Type", value="Dynamic", info="Static may offer better performance but requires calibration data.")
294
  calibration_file_upload = gr.File(label="Upload Calibration Data (.txt)", visible=False, file_types=['.txt'])
295
 
296
  with gr.Group(visible=False) as gguf_options:
297
- gr.Markdown("#### GGUF Pipeline Options")
298
- prune_checkbox_gguf = gr.Checkbox(label="Enable Pruning", value=False, info="Removes redundant weights. Applied before GGUF conversion.")
299
- prune_slider_gguf = gr.Slider(minimum=0, maximum=90, value=20, step=5, label="Pruning Percentage (%)")
300
  gguf_quant_dropdown = gr.Dropdown(
301
  ["q4_k_m", "q5_k_m", "q8_0", "f16"],
302
  label="GGUF Quantization Strategy",
@@ -312,31 +310,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
312
  success_box = gr.Markdown(visible=False)
313
  log_output = gr.Textbox(label="Live Logs", lines=20, interactive=False, max_lines=20)
314
 
315
- # Consolidate pruning controls and pass the correct one based on pipeline type
316
- # This requires a small change in the main run function and the Gradio UI setup
317
- prune_checkbox = gr.Checkbox(label="Enable Pruning", value=False, info="Removes redundant weights from the model.", visible=False)
318
- prune_slider = gr.Slider(minimum=0, maximum=90, value=20, step=5, label="Pruning Percentage (%)", visible=False)
319
-
320
  def update_ui_for_pipeline(pipeline_type):
321
  is_onnx = pipeline_type == "ONNX"
322
  is_gguf = pipeline_type == "GGUF"
 
323
  return {
324
  onnx_options: gr.Group(visible=is_onnx),
325
- gguf_options: gr.Group(visible=is_gguf)
 
 
326
  }
327
 
328
  def update_ui_for_quant_type(quant_type):
329
  return gr.File(visible=quant_type == "Static")
330
-
331
- def run_wrapper(model_id, pipeline_type, onnx_prune, onnx_prune_p, gguf_prune, gguf_prune_p, onnx_quant, calib_file, gguf_quant):
332
- # Logic to decide which pruning values to use
333
- do_prune = onnx_prune if pipeline_type == "ONNX" else gguf_prune
334
- prune_percent = onnx_prune_p if pipeline_type == "ONNX" else gguf_prune_p
335
-
336
- # Call the generator-based pipeline runner
337
- yield from run_amop_pipeline(model_id, pipeline_type, do_prune, prune_percent, onnx_quant, calib_file, gguf_quant)
338
 
339
- pipeline_type_radio.change(fn=update_ui_for_pipeline, inputs=pipeline_type_radio, outputs=[onnx_options, gguf_options])
340
  onnx_quant_radio.change(fn=update_ui_for_quant_type, inputs=onnx_quant_radio, outputs=[calibration_file_upload])
341
 
342
  analyze_button.click(
@@ -346,11 +334,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
346
  )
347
 
348
  run_button.click(
349
- fn=run_wrapper,
350
  inputs=[
351
  model_id_input, pipeline_type_radio,
352
- prune_checkbox_onnx, prune_slider_onnx,
353
- prune_checkbox_gguf, prune_slider_gguf,
354
  onnx_quant_radio, calibration_file_upload, gguf_quant_dropdown
355
  ],
356
  outputs=[run_button, analyze_button, final_output, log_output, success_box]
 
11
  from transformers import AutoConfig, AutoModel, AutoTokenizer
12
  from optimum.onnxruntime import ORTQuantizer
13
  from optimum.onnxruntime.configuration import AutoQuantizationConfig
14
+ from optimum.exporters.gguf import main_export as gguf_export
15
  import torch.nn.utils.prune as prune
16
 
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
120
  output_file = os.path.join(gguf_path, "model.gguf")
121
 
122
  try:
123
+ log_stream += "Calling `optimum.exporters.gguf.main_export` programmatically...\n"
124
+ gguf_export(
125
+ model_id_or_path=model_path,
126
+ output=output_file,
127
+ quantization_strategy=quantization_strategy,
128
+ trust_remote_code=True
129
+ )
 
 
 
 
130
  log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
131
  return gguf_path, log_stream
132
+ except Exception as e:
133
+ error_msg = f"Failed during GGUF conversion. Error: {e}"
134
+ logging.error(error_msg, exc_info=True)
135
  raise RuntimeError(error_msg)
136
 
137
+
138
  def stage_5_package_and_upload(model_id: str, optimized_model_path: str, pipeline_log: str, options: dict):
139
  log_stream = "[STAGE 5] Packaging and Uploading...\n"
140
  if not HF_TOKEN:
 
284
  ["ONNX", "GGUF"], label="Select Optimization Pipeline", info="GGUF is recommended for LLMs, ONNX for others."
285
  )
286
 
287
+ # Unified Pruning controls, shown/hidden by parent group
288
+ prune_checkbox = gr.Checkbox(label="Enable Pruning", value=False, info="Removes redundant weights from the model.")
289
+ prune_slider = gr.Slider(minimum=0, maximum=90, value=20, step=5, label="Pruning Percentage (%)")
290
+
291
  with gr.Group(visible=False) as onnx_options:
292
+ gr.Markdown("#### ONNX Quantization Options")
 
 
293
  onnx_quant_radio = gr.Radio(["Dynamic", "Static"], label="ONNX Quantization Type", value="Dynamic", info="Static may offer better performance but requires calibration data.")
294
  calibration_file_upload = gr.File(label="Upload Calibration Data (.txt)", visible=False, file_types=['.txt'])
295
 
296
  with gr.Group(visible=False) as gguf_options:
297
+ gr.Markdown("#### GGUF Quantization Options")
 
 
298
  gguf_quant_dropdown = gr.Dropdown(
299
  ["q4_k_m", "q5_k_m", "q8_0", "f16"],
300
  label="GGUF Quantization Strategy",
 
310
  success_box = gr.Markdown(visible=False)
311
  log_output = gr.Textbox(label="Live Logs", lines=20, interactive=False, max_lines=20)
312
 
 
 
 
 
 
313
  def update_ui_for_pipeline(pipeline_type):
314
  is_onnx = pipeline_type == "ONNX"
315
  is_gguf = pipeline_type == "GGUF"
316
+ # Pruning controls are visible for either pipeline type, but grouped logically
317
  return {
318
  onnx_options: gr.Group(visible=is_onnx),
319
+ gguf_options: gr.Group(visible=is_gguf),
320
+ prune_checkbox: gr.Checkbox(visible=is_onnx or is_gguf),
321
+ prune_slider: gr.Slider(visible=is_onnx or is_gguf)
322
  }
323
 
324
  def update_ui_for_quant_type(quant_type):
325
  return gr.File(visible=quant_type == "Static")
 
 
 
 
 
 
 
 
326
 
327
+ pipeline_type_radio.change(fn=update_ui_for_pipeline, inputs=pipeline_type_radio, outputs=[onnx_options, gguf_options, prune_checkbox, prune_slider])
328
  onnx_quant_radio.change(fn=update_ui_for_quant_type, inputs=onnx_quant_radio, outputs=[calibration_file_upload])
329
 
330
  analyze_button.click(
 
334
  )
335
 
336
  run_button.click(
337
+ fn=run_amop_pipeline,
338
  inputs=[
339
  model_id_input, pipeline_type_radio,
340
+ prune_checkbox, prune_slider,
 
341
  onnx_quant_radio, calibration_file_upload, gguf_quant_dropdown
342
  ],
343
  outputs=[run_button, analyze_button, final_output, log_output, success_box]