Update README.md
Browse files
README.md
CHANGED
@@ -196,30 +196,27 @@ lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks
|
|
196 |
|
197 |
## int4 weight only quantization with hqq (int4wo-hqq)
|
198 |
```Shell
|
199 |
-
|
|
|
|
|
|
|
200 |
```
|
201 |
|
202 |
| Benchmark | | |
|
203 |
|----------------------------------|----------------|---------------------------|
|
204 |
-
| |
|
205 |
-
| **
|
206 |
-
| mmlu
|
207 |
-
| mmlu_pro
|
208 |
-
|
|
209 |
-
| arc_challenge (0-shot) | 56.91 | 54.86 |
|
210 |
-
| gpqa_main_zeroshot | 30.13 | 30.58 |
|
211 |
-
| HellaSwag | 54.57 | 53.54 |
|
212 |
-
| openbookqa | 33.00 | 34.40 |
|
213 |
-
| piqa (0-shot) | 77.64 | 76.33 |
|
214 |
-
| social_iqa | 49.59 | 47.90 |
|
215 |
-
| truthfulqa_mc2 (0-shot) | 48.39 | 46.44 |
|
216 |
-
| winogrande (0-shot) | 71.11 | 71.51 |
|
217 |
| **Multilingual** | | |
|
218 |
-
| mgsm_en_cot_en |
|
|
|
219 |
| **Math** | | |
|
220 |
-
|
|
221 |
-
|
|
222 |
-
|
|
|
|
223 |
|
224 |
|
225 |
# Peak Memory Usage
|
|
|
196 |
|
197 |
## int4 weight only quantization with hqq (int4wo-hqq)
|
198 |
```Shell
|
199 |
+
export MODEL=pytorch/Qwen3-8B-int4wo-hqq
|
200 |
+
# or
|
201 |
+
# export MODEL=Qwen/Qwen3-8B
|
202 |
+
lm_eval --model hf --model_args pretrained=$MODEL --tasks hellaswag --device cuda:0 --batch_size 8
|
203 |
```
|
204 |
|
205 |
| Benchmark | | |
|
206 |
|----------------------------------|----------------|---------------------------|
|
207 |
+
| | Qwen3-8B | Qwen3-8B-int4wo |
|
208 |
+
| **General** | | |
|
209 |
+
| mmlu | 73.04 | 70.4 |
|
210 |
+
| mmlu_pro | 53.81 | 52.79 |
|
211 |
+
| bbh | 79.33 | WIP |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
| **Multilingual** | | |
|
213 |
+
| mgsm_en_cot_en | 39.6 | 33.2 |
|
214 |
+
| m_mmlu | WIP | WIP |
|
215 |
| **Math** | | |
|
216 |
+
| gpqa_main_zeroshot | 35.71 | 32.14 |
|
217 |
+
| gsm8k | 87.79 | 86.28 |
|
218 |
+
| leaderboard_math_hard | WIP | WIP |
|
219 |
+
| **Overall** | WIP | WIP |
|
220 |
|
221 |
|
222 |
# Peak Memory Usage
|