[V0 Deprecation] Remove VLLM_USE_V1 from docs and scripts (#26336)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-07 16:46:44 +08:00
committed by GitHub
parent 46b0779996
commit 7e4cd070b0
11 changed files with 17 additions and 26 deletions

View File

@@ -166,7 +166,7 @@ main() {
local kv_port=$((21001 + i))
echo " Prefill server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
CUDA_VISIBLE_DEVICES=$gpu_id VLLM_USE_V1=1 vllm serve $MODEL \
CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
--enforce-eager \
--host 0.0.0.0 \
--port $port \
@@ -194,7 +194,7 @@ main() {
local kv_port=$((22001 + i))
echo " Decode server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
VLLM_USE_V1=1 CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
--enforce-eager \
--host 0.0.0.0 \
--port $port \

View File

@@ -55,7 +55,6 @@ done
echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALLEL_SIZE and redundant experts: $REDUNDANT_EXPERTS"
export RAY_DEDUP_LOGS=0
export VLLM_USE_V1=1
export VLLM_ALL2ALL_BACKEND="pplx"
export VLLM_USE_DEEP_GEMM=1

View File

@@ -5,7 +5,7 @@ To run this example, you can start the vLLM server
without any specific flags:
```bash
VLLM_USE_V1=0 vllm serve unsloth/Llama-3.2-1B-Instruct \
vllm serve unsloth/Llama-3.2-1B-Instruct \
--structured-outputs-config.backend outlines
```

View File

@@ -36,7 +36,6 @@ llm_config = LLMConfig(
},
# Set to the node's accelerator type.
accelerator_type="H100",
runtime_env={"env_vars": {"VLLM_USE_V1": "1"}},
# Customize engine arguments as required (for example, vLLM engine kwargs).
engine_kwargs={
"tensor_parallel_size": 8,