jerryzh168 commited on
Commit
ea837c9
·
verified ·
1 Parent(s): 7a964a6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -338,7 +338,7 @@ Note: you can change the number of prompts to be benchmarked with `--num-prompts
338
  Server:
339
  ```Shell
340
  export MODEL=Qwen/Qwen3-8B
341
- vllm serve $MODEL --tokenizer microsoft/Phi-4-mini-instruct -O3
342
  ```
343
 
344
  Client:
@@ -351,7 +351,7 @@ python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --
351
  Server:
352
  ```Shell
353
  export MODEL=pytorch/Qwen3-8B-int4wo-hqq
354
- VLLM_DISABLE_COMPILE_CACHE=1 vllm serve $MODEL --tokenizer microsoft/Phi-4-mini-instruct -O3 --pt-load-map-location cuda:0
355
  ```
356
 
357
  Client:
 
338
  Server:
339
  ```Shell
340
  export MODEL=Qwen/Qwen3-8B
341
+ vllm serve $MODEL --tokenizer $MODEL -O3
342
  ```
343
 
344
  Client:
 
351
  Server:
352
  ```Shell
353
  export MODEL=pytorch/Qwen3-8B-int4wo-hqq
354
+ VLLM_DISABLE_COMPILE_CACHE=1 vllm serve $MODEL --tokenizer $MODEL -O3 --pt-load-map-location cuda:0
355
  ```
356
 
357
  Client: