Update README.md
Browse files
README.md
CHANGED
@@ -338,7 +338,7 @@ Note: you can change the number of prompts to be benchmarked with `--num-prompts
|
|
338 |
Server:
|
339 |
```Shell
|
340 |
export MODEL=Qwen/Qwen3-8B
|
341 |
-
vllm serve $MODEL --tokenizer
|
342 |
```
|
343 |
|
344 |
Client:
|
@@ -351,7 +351,7 @@ python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --
|
|
351 |
Server:
|
352 |
```Shell
|
353 |
export MODEL=pytorch/Qwen3-8B-int4wo-hqq
|
354 |
-
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve $MODEL --tokenizer
|
355 |
```
|
356 |
|
357 |
Client:
|
|
|
338 |
Server:
|
339 |
```Shell
|
340 |
export MODEL=Qwen/Qwen3-8B
|
341 |
+
vllm serve $MODEL --tokenizer $MODEL -O3
|
342 |
```
|
343 |
|
344 |
Client:
|
|
|
351 |
Server:
|
352 |
```Shell
|
353 |
export MODEL=pytorch/Qwen3-8B-int4wo-hqq
|
354 |
+
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve $MODEL --tokenizer $MODEL -O3 --pt-load-map-location cuda:0
|
355 |
```
|
356 |
|
357 |
Client:
|