Update README.md
Browse files
README.md
CHANGED
|
@@ -429,4 +429,38 @@ lm_eval \
|
|
| 429 |
--tasks truthfulqa \
|
| 430 |
--num_fewshot 0 \
|
| 431 |
--batch_size auto
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
```
|
|
|
|
| 429 |
--tasks truthfulqa \
|
| 430 |
--num_fewshot 0 \
|
| 431 |
--batch_size auto
|
| 432 |
+
```
|
| 433 |
+
|
| 434 |
+
#### OpenLLM v2
|
| 435 |
+
```
|
| 436 |
+
lm_eval \
|
| 437 |
+
--model vllm \
|
| 438 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-quantized.w4a16",dtype=auto,max_model_len=4096,tensor_parallel_size=1",enable_chunked_prefill=True \
|
| 439 |
+
--apply_chat_template \
|
| 440 |
+
--fewshot_as_multiturn \
|
| 441 |
+
--tasks leaderboard \
|
| 442 |
+
--batch_size auto
|
| 443 |
+
```
|
| 444 |
+
|
| 445 |
+
#### HumanEval and HumanEval+
|
| 446 |
+
##### Generation
|
| 447 |
+
```
|
| 448 |
+
python3 codegen/generate.py \
|
| 449 |
+
--model neuralmagic/Meta-Llama-3.1-70B-Instruct-quantized.w4a16 \
|
| 450 |
+
--bs 16 \
|
| 451 |
+
--temperature 0.2 \
|
| 452 |
+
--n_samples 50 \
|
| 453 |
+
--root "." \
|
| 454 |
+
--dataset humaneval
|
| 455 |
+
```
|
| 456 |
+
##### Sanitization
|
| 457 |
+
```
|
| 458 |
+
python3 evalplus/sanitize.py \
|
| 459 |
+
humaneval/neuralmagic--Meta-Llama-3.1-70B-Instruct-quantized.w4a16_vllm_temp_0.2
|
| 460 |
+
```
|
| 461 |
+
##### Evaluation
|
| 462 |
+
```
|
| 463 |
+
evalplus.evaluate \
|
| 464 |
+
--dataset humaneval \
|
| 465 |
+
--samples humaneval/neuralmagic--Meta-Llama-3.1-70B-Instruct-quantized.w4a16_vllm_temp_0.2-sanitized
|
| 466 |
```
|