jerryzh168 commited on
Commit
471f957
·
verified ·
1 Parent(s): 88f3761

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -319,13 +319,13 @@ Run the benchmarks under `vllm` root folder:
319
  ### baseline
320
  ```Shell
321
  export MODEL=Qwen/Qwen3-8B
322
- python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model $MODEL --batch-size 1
323
  ```
324
 
325
  ### INT4
326
  ```Shell
327
  export MODEL=pytorch/Qwen3-8B-INT4
328
- VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model $MODEL --batch-size 1
329
  ```
330
 
331
  ## benchmark_serving
 
319
  ### baseline
320
  ```Shell
321
  export MODEL=Qwen/Qwen3-8B
322
+ vllm bench latency --input-len 256 --output-len 256 --model $MODEL --batch-size 1
323
  ```
324
 
325
  ### INT4
326
  ```Shell
327
  export MODEL=pytorch/Qwen3-8B-INT4
328
+ VLLM_DISABLE_COMPILE_CACHE=1 vllm bench latency --input-len 256 --output-len 256 --model $MODEL --batch-size 1
329
  ```
330
 
331
  ## benchmark_serving