[Docs] NixlConnector quickstart guide (#24249)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
Signed-off-by: Peter Pan <peter.pan@daocloud.io>
Signed-off-by: Nicolò Lucchesi<nicolo.lucchesi@gmail.com>
Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com>
This commit is contained in:
Peter Pan
2025-09-23 22:23:22 +08:00
committed by GitHub
parent f05a4f0e34
commit da5e7e4329
3 changed files with 168 additions and 3 deletions

View File

@@ -85,7 +85,10 @@ run_tests_for_model() {
echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT"
# Build the command with or without model-specific args
BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID \
UCX_NET_DEVICES=all \
VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \
vllm serve $model_name \
--port $PORT \
--enforce-eager \
--gpu-memory-utilization 0.2 \
@@ -117,7 +120,10 @@ run_tests_for_model() {
echo "Starting decode instance $i on GPU $GPU_ID, port $PORT"
# Build the command with or without model-specific args
BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID \
UCX_NET_DEVICES=all \
VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \
vllm serve $model_name \
--port $PORT \
--enforce-eager \
--gpu-memory-utilization 0.2 \