#!/usr/bin/env python3 """ 8-bit quantization script """ import os import subprocess import sys def quantize_8bit(): input_path = "trained_models/isaac_sim_hf/gguf_final/isaac_sim_qwen2.5_coder.gguf" output_path = "trained_models/isaac_sim_hf/gguf_final/isaac_sim_qwen2.5_coder_q8_0.gguf" if not os.path.exists(input_path): print(f"❌ Input file not found: {input_path}") return False print(f"🔢 Quantizing to 8-bit: {input_path} -> {output_path}") cmd = [ "../../llama.cpp/build/bin/llama-quantize", input_path, output_path, "q8_0" ] try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) print("✅ 8-bit quantization completed") if os.path.exists(output_path): size_mb = os.path.getsize(output_path) / (1024 * 1024) print(f"📁 8-bit file size: {size_mb:.1f} MB") return True except subprocess.CalledProcessError as e: print(f"❌ 8-bit quantization failed: {e.stderr}") return False if __name__ == "__main__": success = quantize_8bit() sys.exit(0 if success else 1)