Make engine core client handshake timeout configurable (#27444)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
Seiji Eicher
2025-12-19 12:38:30 -08:00
committed by GitHub
parent 969bbc7c61
commit 1ab5213531
3 changed files with 105 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ if TYPE_CHECKING:
LOCAL_RANK: int = 0
CUDA_VISIBLE_DEVICES: str | None = None
VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60
VLLM_ENGINE_READY_TIMEOUT_S: int = 600
VLLM_API_KEY: str | None = None
VLLM_DEBUG_LOG_API_SERVER_RESPONSE: bool = False
S3_ACCESS_KEY_ID: str | None = None
@@ -604,6 +605,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_ENGINE_ITERATION_TIMEOUT_S": lambda: int(
os.environ.get("VLLM_ENGINE_ITERATION_TIMEOUT_S", "60")
),
# Timeout in seconds for waiting for engine cores to become ready
# during startup. Default is 600 seconds (10 minutes).
"VLLM_ENGINE_READY_TIMEOUT_S": lambda: int(
os.environ.get("VLLM_ENGINE_READY_TIMEOUT_S", "600")
),
# API key for vLLM API server
"VLLM_API_KEY": lambda: os.environ.get("VLLM_API_KEY", None),
# Whether to log responses from API Server for debugging