linzhao-amd commited on
Commit
7b7373c
·
verified ·
1 Parent(s): 0164045

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -32,6 +32,7 @@ cd Quark/examples/torch/language_modeling/llm_ptq/
32
  python3 quantize_quark.py --model_dir "meta-llama/Llama-3.1-405B-Instruct" \
33
  --model_attn_implementation "sdpa" \
34
  --quant_scheme w_mxfp4_a_mxfp4 \
 
35
  --kv_cache_dtype fp8 \
36
  --quant_algo autosmoothquant \
37
  --min_kv_scale 1.0 \
 
32
  python3 quantize_quark.py --model_dir "meta-llama/Llama-3.1-405B-Instruct" \
33
  --model_attn_implementation "sdpa" \
34
  --quant_scheme w_mxfp4_a_mxfp4 \
35
+ --group_size 32 \
36
  --kv_cache_dtype fp8 \
37
  --quant_algo autosmoothquant \
38
  --min_kv_scale 1.0 \