Update README.md
Browse files
README.md
CHANGED
@@ -32,6 +32,7 @@ cd Quark/examples/torch/language_modeling/llm_ptq/
|
|
32 |
python3 quantize_quark.py --model_dir "meta-llama/Llama-3.1-405B-Instruct" \
|
33 |
--model_attn_implementation "sdpa" \
|
34 |
--quant_scheme w_mxfp4_a_mxfp4 \
|
|
|
35 |
--kv_cache_dtype fp8 \
|
36 |
--quant_algo autosmoothquant \
|
37 |
--min_kv_scale 1.0 \
|
|
|
32 |
python3 quantize_quark.py --model_dir "meta-llama/Llama-3.1-405B-Instruct" \
|
33 |
--model_attn_implementation "sdpa" \
|
34 |
--quant_scheme w_mxfp4_a_mxfp4 \
|
35 |
+
--group_size 32 \
|
36 |
--kv_cache_dtype fp8 \
|
37 |
--quant_algo autosmoothquant \
|
38 |
--min_kv_scale 1.0 \
|