Update README.md
Browse files
README.md
CHANGED
|
@@ -14,10 +14,12 @@ pipeline_tag: text-generation
|
|
| 14 |
|
| 15 |
# 1. Inference with vLLM
|
| 16 |
```Shell
|
|
|
|
| 17 |
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve SocialLocalMobile/Qwen3-32B-float8dq --tokenizer Qwen/Qwen3-32B -O3
|
| 18 |
```
|
| 19 |
|
| 20 |
```Shell
|
|
|
|
| 21 |
curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 22 |
"model": "SocialLocalMobile/Qwen3-32B-float8dq",
|
| 23 |
"messages": [
|
|
|
|
| 14 |
|
| 15 |
# 1. Inference with vLLM
|
| 16 |
```Shell
|
| 17 |
+
# Server
|
| 18 |
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve SocialLocalMobile/Qwen3-32B-float8dq --tokenizer Qwen/Qwen3-32B -O3
|
| 19 |
```
|
| 20 |
|
| 21 |
```Shell
|
| 22 |
+
# Client
|
| 23 |
curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 24 |
"model": "SocialLocalMobile/Qwen3-32B-float8dq",
|
| 25 |
"messages": [
|