amd
/

dbrx-instruct-FP8-KV

Model card Files Files and versions

bowenbaoamd commited on Oct 11, 2024

Commit

c3bd407

·

verified ·

1 Parent(s): 84b299a

Update README.md

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: databricks/dbrx-instruct
 license: other
 ---
-# dbrx_moe_fp8_test
 - ## Introduction
   This model was created by applying [Quark](https://quark.docs.amd.com/latest/index.html) with calibration samples from Pile dataset.
 - ## Quantization Stragegy
@@ -18,7 +18,7 @@ export MODEL_DIR = [local model checkpoint folder] or databricks/dbrx-instruct
 # single GPU
 python3 quantize_quark.py \
         --model_dir $MODEL_DIR \
-        --output_dir dbrx_moe_fp8_test \
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
         --num_calib_data 128 \
@@ -27,7 +27,7 @@ python3 quantize_quark.py \
 # If model size is too large for single GPU, please use multi GPU instead.
 python3 quantize_quark.py
         --model_dir $MODEL_DIR \
-        --output_dir dbrx_moe_fp8_test\
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
         --num_calib_data 128 \
@@ -50,7 +50,7 @@ The quantization evaluation results are conducted in pseudo-quantization mode, w
    </td>
    <td><strong>dbrx-instruct </strong>
    </td>
-   <td><strong>dbrx_moe_fp8_test(this model)</strong>
    </td>
   </tr>
   <tr>

 base_model: databricks/dbrx-instruct
 license: other
 ---
+# dbrx-instruct-FP8-KV
 - ## Introduction
   This model was created by applying [Quark](https://quark.docs.amd.com/latest/index.html) with calibration samples from Pile dataset.
 - ## Quantization Stragegy
 # single GPU
 python3 quantize_quark.py \
         --model_dir $MODEL_DIR \
+        --output_dir dbrx-instruct-FP8-KV \
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
         --num_calib_data 128 \
 # If model size is too large for single GPU, please use multi GPU instead.
 python3 quantize_quark.py
         --model_dir $MODEL_DIR \
+        --output_dir dbrx-instruct-FP8-KV\
         --quant_scheme w_fp8_a_fp8 \
         --kv_cache_dtype fp8 \
         --num_calib_data 128 \
    </td>
    <td><strong>dbrx-instruct </strong>
    </td>
+   <td><strong>dbrx-instruct-FP8-KV(this model)</strong>
    </td>
   </tr>
   <tr>