Update Windows executable naming as per llama.cpp changes
Browse files- gguf-imat-for-FP16.py +3 -3
gguf-imat-for-FP16.py
CHANGED
|
@@ -121,7 +121,7 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
|
|
| 121 |
create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name)
|
| 122 |
|
| 123 |
def create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name):
|
| 124 |
-
imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
|
| 125 |
imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
|
| 126 |
imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
|
| 127 |
if not os.path.exists(imatrix_output_dst):
|
|
@@ -142,7 +142,7 @@ def quantize_models(base_dir, model_name):
|
|
| 142 |
|
| 143 |
quantization_options = [
|
| 144 |
"IQ3_M", "IQ3_XXS",
|
| 145 |
-
"Q4_K_M", "Q4_K_S", "
|
| 146 |
"Q5_K_M", "Q5_K_S",
|
| 147 |
"Q6_K",
|
| 148 |
"Q8_0"
|
|
@@ -151,7 +151,7 @@ def quantize_models(base_dir, model_name):
|
|
| 151 |
for quant_option in quantization_options:
|
| 152 |
quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
|
| 153 |
quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
|
| 154 |
-
quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
|
| 155 |
imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
|
| 156 |
|
| 157 |
subprocess.run([quantize_command, "--imatrix", imatrix_path,
|
|
|
|
| 121 |
create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name)
|
| 122 |
|
| 123 |
def create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name):
|
| 124 |
+
imatrix_exe = os.path.join(base_dir, "bin", "llama-imatrix.exe")
|
| 125 |
imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
|
| 126 |
imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
|
| 127 |
if not os.path.exists(imatrix_output_dst):
|
|
|
|
| 142 |
|
| 143 |
quantization_options = [
|
| 144 |
"IQ3_M", "IQ3_XXS",
|
| 145 |
+
"Q4_K_M", "Q4_K_S", "IQ4_XS",
|
| 146 |
"Q5_K_M", "Q5_K_S",
|
| 147 |
"Q6_K",
|
| 148 |
"Q8_0"
|
|
|
|
| 151 |
for quant_option in quantization_options:
|
| 152 |
quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
|
| 153 |
quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
|
| 154 |
+
quantize_command = os.path.join(base_dir, "bin", "llama-quantize.exe")
|
| 155 |
imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
|
| 156 |
|
| 157 |
subprocess.run([quantize_command, "--imatrix", imatrix_path,
|