Make engine core client handshake timeout configurable (#27444)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
@@ -24,6 +24,7 @@ if TYPE_CHECKING:
|
||||
LOCAL_RANK: int = 0
|
||||
CUDA_VISIBLE_DEVICES: str | None = None
|
||||
VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60
|
||||
VLLM_ENGINE_READY_TIMEOUT_S: int = 600
|
||||
VLLM_API_KEY: str | None = None
|
||||
VLLM_DEBUG_LOG_API_SERVER_RESPONSE: bool = False
|
||||
S3_ACCESS_KEY_ID: str | None = None
|
||||
@@ -604,6 +605,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_ENGINE_ITERATION_TIMEOUT_S": lambda: int(
|
||||
os.environ.get("VLLM_ENGINE_ITERATION_TIMEOUT_S", "60")
|
||||
),
|
||||
# Timeout in seconds for waiting for engine cores to become ready
|
||||
# during startup. Default is 600 seconds (10 minutes).
|
||||
"VLLM_ENGINE_READY_TIMEOUT_S": lambda: int(
|
||||
os.environ.get("VLLM_ENGINE_READY_TIMEOUT_S", "600")
|
||||
),
|
||||
# API key for vLLM API server
|
||||
"VLLM_API_KEY": lambda: os.environ.get("VLLM_API_KEY", None),
|
||||
# Whether to log responses from API Server for debugging
|
||||
|
||||
Reference in New Issue
Block a user