diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index 1e9318796..c80db1b89 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -123,6 +123,7 @@ steps: - tests/test_inputs.py - tests/test_outputs.py - tests/test_pooling_params.py + - tests/test_ray_env.py - tests/multimodal - tests/renderers - tests/standalone_tests/lazy_imports.py @@ -136,6 +137,7 @@ steps: - pytest -v -s test_inputs.py - pytest -v -s test_outputs.py - pytest -v -s test_pooling_params.py + - pytest -v -s test_ray_env.py - pytest -v -s -m 'cpu_test' multimodal - pytest -v -s renderers - pytest -v -s tokenizers_ diff --git a/tests/test_ray_env.py b/tests/test_ray_env.py new file mode 100644 index 000000000..c08f088ac --- /dev/null +++ b/tests/test_ray_env.py @@ -0,0 +1,194 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Tests for vllm.ray.ray_env — env var propagation to Ray workers.""" + +import os +from unittest.mock import patch + +from vllm.ray.ray_env import get_env_vars_to_copy + +# --------------------------------------------------------------------------- +# Default prefix matching +# --------------------------------------------------------------------------- + + +class TestDefaultPrefixes: + """Built-in prefixes (VLLM_, LMCACHE_, NCCL_, UCX_, HF_, HUGGING_FACE_) + should be forwarded without any extra configuration.""" + + @patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False) + def test_lmcache_prefix(self): + result = get_env_vars_to_copy() + assert "LMCACHE_LOCAL_CPU" in result + + @patch.dict(os.environ, {"NCCL_DEBUG": "INFO"}, clear=False) + def test_nccl_prefix(self): + result = get_env_vars_to_copy() + assert "NCCL_DEBUG" in result + + @patch.dict(os.environ, {"UCX_TLS": "rc"}, clear=False) + def test_ucx_prefix(self): + result = get_env_vars_to_copy() + assert "UCX_TLS" in result + + @patch.dict(os.environ, {"HF_TOKEN": "secret"}, clear=False) + def test_hf_token_via_prefix(self): + result = get_env_vars_to_copy() + assert "HF_TOKEN" in result + + @patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": "secret"}, clear=False) + def test_hugging_face_prefix(self): + result = get_env_vars_to_copy() + assert "HUGGING_FACE_HUB_TOKEN" in result + + +# --------------------------------------------------------------------------- +# Default extra vars +# --------------------------------------------------------------------------- + + +class TestDefaultExtraVars: + """Individual vars listed in VLLM_RAY_EXTRA_ENV_VARS_TO_COPY's default.""" + + def test_pythonhashseed_in_result(self): + """PYTHONHASHSEED should always be in the result set (as a name to + copy) regardless of whether it is actually set in os.environ.""" + result = get_env_vars_to_copy() + assert "PYTHONHASHSEED" in result + + +# --------------------------------------------------------------------------- +# User-supplied extensions +# --------------------------------------------------------------------------- + + +class TestUserExtensions: + """Users can add prefixes and extra vars at deploy time.""" + + @patch.dict( + os.environ, + { + "VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_", + "MYLIB_FOO": "bar", + }, + clear=False, + ) + def test_user_prefix(self): + """User-supplied prefixes are additive — built-in defaults are kept.""" + result = get_env_vars_to_copy() + assert "MYLIB_FOO" in result + + @patch.dict( + os.environ, + { + "VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_SECRET", + "MY_SECRET": "val", + }, + clear=False, + ) + def test_user_extra_var(self): + """User-supplied extras are additive — PYTHONHASHSEED still included.""" + result = get_env_vars_to_copy() + assert "MY_SECRET" in result + assert "PYTHONHASHSEED" in result + + +# --------------------------------------------------------------------------- +# Exclusion +# --------------------------------------------------------------------------- + + +class TestExclusion: + """exclude_vars and RAY_NON_CARRY_OVER_ENV_VARS take precedence.""" + + @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}, clear=False) + def test_exclude_vars(self): + result = get_env_vars_to_copy(exclude_vars={"CUDA_VISIBLE_DEVICES"}) + assert "CUDA_VISIBLE_DEVICES" not in result + + @patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False) + @patch( + "vllm.ray.ray_env.RAY_NON_CARRY_OVER_ENV_VARS", + {"LMCACHE_LOCAL_CPU"}, + ) + def test_non_carry_over_blacklist(self): + result = get_env_vars_to_copy() + assert "LMCACHE_LOCAL_CPU" not in result + + +# --------------------------------------------------------------------------- +# additional_vars (platform extension point) +# --------------------------------------------------------------------------- + + +class TestAdditionalVars: + """The additional_vars parameter supports platform-specific vars.""" + + @patch.dict(os.environ, {"CUSTOM_PLATFORM_VAR": "1"}, clear=False) + def test_additional_vars_passthrough(self): + result = get_env_vars_to_copy(additional_vars={"CUSTOM_PLATFORM_VAR"}) + assert "CUSTOM_PLATFORM_VAR" in result + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +class TestEdgeCases: + """Prefix matching should be strict (startswith, not contains).""" + + @patch.dict(os.environ, {"LMCACH_TYPO": "1"}, clear=False) + def test_prefix_no_partial_match(self): + """'LMCACH_' does not match the 'LMCACHE_' prefix.""" + result = get_env_vars_to_copy() + assert "LMCACH_TYPO" not in result + + @patch.dict( + os.environ, + { + "VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": " MYLIB_ , OTHER_ ", + }, + clear=False, + ) + def test_csv_whitespace_handling(self): + """Whitespace around commas and tokens should be stripped.""" + result = get_env_vars_to_copy() + # MYLIB_ and OTHER_ should be parsed as valid prefixes — no crash + assert isinstance(result, set) + + @patch.dict( + os.environ, + { + "VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_", + "LMCACHE_BACKEND": "cpu", + "NCCL_DEBUG": "INFO", + "MYLIB_FOO": "bar", + }, + clear=False, + ) + def test_user_prefix_additive(self): + """Setting VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY does NOT drop defaults.""" + result = get_env_vars_to_copy() + # Built-in defaults still present + assert "LMCACHE_BACKEND" in result + assert "NCCL_DEBUG" in result + # User addition also present + assert "MYLIB_FOO" in result + + @patch.dict( + os.environ, + { + "VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_FLAG", + "PYTHONHASHSEED": "42", + "MY_FLAG": "1", + }, + clear=False, + ) + def test_user_extra_additive(self): + """Setting VLLM_RAY_EXTRA_ENV_VARS_TO_COPY does NOT drop defaults.""" + result = get_env_vars_to_copy() + # Built-in default still present + assert "PYTHONHASHSEED" in result + # User addition also present + assert "MY_FLAG" in result diff --git a/vllm/envs.py b/vllm/envs.py index 15fa5fc3e..b32683ecb 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -139,6 +139,8 @@ if TYPE_CHECKING: VLLM_ENABLE_MOE_DP_CHUNK: bool = True VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict" + VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY: str = "" + VLLM_RAY_EXTRA_ENV_VARS_TO_COPY: str = "" VLLM_MARLIN_USE_ATOMIC_ADD: bool = False VLLM_MARLIN_INPUT_DTYPE: Literal["int8", "fp8"] | None = None VLLM_MXFP4_USE_MARLIN: bool | None = None @@ -1090,6 +1092,19 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_RAY_DP_PACK_STRATEGY": lambda: os.getenv( "VLLM_RAY_DP_PACK_STRATEGY", "strict" ), + # Comma-separated *additional* prefixes of env vars to copy from the + # driver to Ray workers. These are merged with the built-in defaults + # defined in ``vllm.ray.ray_env`` (VLLM_, etc.). Example: "MYLIB_,OTHER_" + "VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": lambda: os.getenv( + "VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY", "" + ), + # Comma-separated *additional* individual env var names to copy from + # the driver to Ray workers. Merged with the built-in defaults + # defined in ``vllm.ray.ray_env`` (PYTHONHASHSEED). + # Example: "MY_SECRET,MY_FLAG" + "VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": lambda: os.getenv( + "VLLM_RAY_EXTRA_ENV_VARS_TO_COPY", "" + ), # Whether to use S3 path for model loading in CI via RunAI Streamer "VLLM_CI_USE_S3": lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1", # Use model_redirect to redirect the model name to a local folder. diff --git a/vllm/ray/ray_env.py b/vllm/ray/ray_env.py index 85623cfe5..5ecca742c 100644 --- a/vllm/ray/ray_env.py +++ b/vllm/ray/ray_env.py @@ -10,8 +10,7 @@ logger = init_logger(__name__) CONFIG_HOME = envs.VLLM_CONFIG_ROOT -# This file contains a list of env vars that should not be copied -# from the driver to the Ray workers. +# Env vars that should NOT be copied from the driver to Ray workers. RAY_NON_CARRY_OVER_ENV_VARS_FILE = os.path.join( CONFIG_HOME, "ray_non_carry_over_env_vars.json" ) @@ -29,51 +28,89 @@ except json.JSONDecodeError: ) RAY_NON_CARRY_OVER_ENV_VARS = set() +# --------------------------------------------------------------------------- +# Built-in defaults for env var propagation. +# Users can add more via VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY and +# VLLM_RAY_EXTRA_ENV_VARS_TO_COPY (additive, not replacing). +# --------------------------------------------------------------------------- +DEFAULT_ENV_VAR_PREFIXES: set[str] = { + "VLLM_", + "LMCACHE_", + "NCCL_", + "UCX_", + "HF_", + "HUGGING_FACE_", +} + +DEFAULT_EXTRA_ENV_VARS: set[str] = { + "PYTHONHASHSEED", +} + + +def _parse_csv(value: str) -> set[str]: + """Split a comma-separated string into a set of stripped, non-empty tokens.""" + return {tok.strip() for tok in value.split(",") if tok.strip()} + def get_env_vars_to_copy( exclude_vars: set[str] | None = None, additional_vars: set[str] | None = None, destination: str | None = None, ) -> set[str]: - """ - Get the environment variables to copy to downstream Ray actors. + """Return the env var names to copy from the driver to Ray actors. - Example use cases: - - Copy environment variables from RayDistributedExecutor to Ray workers. - - Copy environment variables from RayDPClient to Ray DPEngineCoreActor. + The result is the union of: + + 1. Env vars registered in ``vllm.envs.environment_variables``. + 2. Env vars in ``os.environ`` matching a prefix in + ``DEFAULT_ENV_VAR_PREFIXES`` + ``VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY``. + 3. Individual names in ``DEFAULT_EXTRA_ENV_VARS`` + + ``VLLM_RAY_EXTRA_ENV_VARS_TO_COPY``. + 4. Caller-supplied *additional_vars* (e.g. platform-specific). + + Minus any names in *exclude_vars* or ``RAY_NON_CARRY_OVER_ENV_VARS``. Args: - exclude_vars: A set of vllm defined environment variables to exclude - from copying. - additional_vars: A set of additional environment variables to copy. - If a variable is in both exclude_vars and additional_vars, it will - be excluded. - destination: The destination of the environment variables. - Returns: - A set of environment variables to copy. + exclude_vars: Env vars to exclude (e.g. worker-specific ones). + additional_vars: Extra individual env var names to copy. Useful + for caller-specific vars (e.g. platform env vars). + destination: Label used in log messages only. """ - exclude_vars = exclude_vars or set() - additional_vars = additional_vars or set() + exclude = (exclude_vars or set()) | RAY_NON_CARRY_OVER_ENV_VARS - env_vars_to_copy = { - v - for v in set(envs.environment_variables).union(additional_vars) - if v not in exclude_vars and v not in RAY_NON_CARRY_OVER_ENV_VARS - } - - to_destination = " to " + destination if destination is not None else "" - - logger.info( - "RAY_NON_CARRY_OVER_ENV_VARS from config: %s", RAY_NON_CARRY_OVER_ENV_VARS + # -- prefixes (built-in + user-supplied, additive) ---------------------- + prefixes = DEFAULT_ENV_VAR_PREFIXES | _parse_csv( + envs.VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY ) + + # -- collect env var names ---------------------------------------------- + # 1. vLLM's registered env vars + result = set(envs.environment_variables) + # 2. Prefix-matched vars present in the current environment + result |= {name for name in os.environ if any(name.startswith(p) for p in prefixes)} + # 3. Individual extra vars (built-in + user-supplied, additive) + result |= DEFAULT_EXTRA_ENV_VARS | _parse_csv(envs.VLLM_RAY_EXTRA_ENV_VARS_TO_COPY) + # 4. Caller-supplied extra vars (e.g. platform-specific) + result |= additional_vars or set() + # 5. Exclude worker-specific and user-blacklisted vars + result -= exclude + + # -- logging ------------------------------------------------------------ + dest = f" to {destination}" if destination else "" + logger.info("Env var prefixes to copy: %s", sorted(prefixes)) logger.info( "Copying the following environment variables%s: %s", - to_destination, - [v for v in env_vars_to_copy if v in os.environ], + dest, + sorted(v for v in result if v in os.environ), ) + if RAY_NON_CARRY_OVER_ENV_VARS: + logger.info( + "RAY_NON_CARRY_OVER_ENV_VARS from config: %s", + RAY_NON_CARRY_OVER_ENV_VARS, + ) logger.info( - "If certain env vars should NOT be copied, add them to %s file", + "To exclude env vars from copying, add them to %s", RAY_NON_CARRY_OVER_ENV_VARS_FILE, ) - return env_vars_to_copy + return result diff --git a/vllm/v1/executor/ray_executor.py b/vllm/v1/executor/ray_executor.py index a1f69c478..ad51526ae 100644 --- a/vllm/v1/executor/ray_executor.py +++ b/vllm/v1/executor/ray_executor.py @@ -73,9 +73,6 @@ class RayDistributedExecutor(Executor): "ROCR_VISIBLE_DEVICES", } - # These non-vLLM env vars are copied from the driver to workers - ADDITIONAL_ENV_VARS = {"HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"} - uses_ray: bool = True supports_pp: bool = True @@ -339,9 +336,7 @@ class RayDistributedExecutor(Executor): # Environment variables to copy from driver to workers env_vars_to_copy = get_env_vars_to_copy( exclude_vars=self.WORKER_SPECIFIC_ENV_VARS, - additional_vars=set(current_platform.additional_env_vars).union( - self.ADDITIONAL_ENV_VARS - ), + additional_vars=set(current_platform.additional_env_vars), destination="workers", )