Morgan Funtowicz
feat(text-generation): default to transformers backend as much as we can
0727aa0
raw
history blame
353 Bytes
#!/bin/bash
python3 -m sglang.launch_server \
--model-path $MODEL_ID \
--kv-cache-dtype $KV_CACHE_DTYPE \
--tensor-parallel-size $TP_SIZE \
--expert-parallel-size $TP_SIZE \
--quantization $QUANT_METHOD \
--enable-torch-compile \
--enable-ep-moe \
--model-impl $MODEL_IMPL \
--tool-call-parser qwen25 \
--host 0.0.0.0 \
--port 80