[Feat][Executor] Introduce RayExecutorV2 (#36836)

Signed-off-by: Jeffrey Wang <jeffreywang@anyscale.com>
2026-04-01 14:34:29 -07:00
parent cb268e4e55
commit de5e6c44c6
14 changed files with 1603 additions and 30 deletions
--- a/tests/distributed/conftest.py
+++ b/tests/distributed/conftest.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 import random

 import msgspec
@@ -166,3 +167,31 @@ class MockSubscriber:
        self.sub.close()
        for replay in self.replay_sockets:
            replay.close()
+
+
+@pytest.fixture
+def enable_ray_v2_backend():
+    """Set env vars for the Ray V2 executor backend and shut down Ray
+    between tests."""
+    import ray
+
+    saved = {
+        "VLLM_USE_RAY_V2_EXECUTOR_BACKEND": os.environ.get(
+            "VLLM_USE_RAY_V2_EXECUTOR_BACKEND"
+        ),
+        "VLLM_ENABLE_V1_MULTIPROCESSING": os.environ.get(
+            "VLLM_ENABLE_V1_MULTIPROCESSING"
+        ),
+    }
+    os.environ["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
+    os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+    if ray.is_initialized():
+        ray.shutdown()
+    try:
+        yield
+    finally:
+        if ray.is_initialized():
+            ray.shutdown()
+        os.environ.update({k: v for k, v in saved.items() if v is not None})
+        for key in (k for k, v in saved.items() if v is None):
+            os.environ.pop(key, None)
--- a/tests/distributed/test_mq_tcp_multinode.py
+++ b/tests/distributed/test_mq_tcp_multinode.py
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Multi-node integration test for MessageQueue TCP fallback.
+
+Verifies that when writer and readers span separate nodes (Docker containers
+with isolated /dev/shm), `create_from_process_group` correctly detects
+cross-node ranks via `in_the_same_node_as()` and falls back to ZMQ TCP
+transport — and that data actually arrives.
+"""
+
+import numpy as np
+import torch.distributed as dist
+
+from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
+from vllm.distributed.parallel_state import in_the_same_node_as
+
+
+def main():
+    dist.init_process_group(backend="gloo")
+
+    rank = dist.get_rank()
+    world_size = dist.get_world_size()
+    assert world_size >= 2, (
+        f"Need at least 2 ranks across nodes, got world_size={world_size}"
+    )
+
+    # Verify that in_the_same_node_as detects cross-node correctly
+    status = in_the_same_node_as(dist.group.WORLD, source_rank=0)
+    local_count = sum(status)
+    print(
+        f"[Rank {rank}] in_the_same_node_as(source=0): {status}  "
+        f"(local={local_count}/{world_size})"
+    )
+    # With 2 Docker containers (1 proc each), rank 0 and rank 1
+    # should be on different nodes.
+    assert local_count < world_size, (
+        f"Expected cross-node ranks but all {world_size} ranks appear local."
+    )
+
+    # Create MessageQueue
+    writer_rank = 0
+    mq = MessageQueue.create_from_process_group(
+        dist.group.WORLD,
+        max_chunk_bytes=1024 * 1024,  # 1 MiB
+        max_chunks=10,
+        writer_rank=writer_rank,
+    )
+
+    # Verify the transport path selection
+    if rank == writer_rank:
+        print(
+            f"[Rank {rank}] Writer: n_local_reader={mq.n_local_reader}, "
+            f"n_remote_reader={mq.n_remote_reader}"
+        )
+        assert mq.n_remote_reader > 0, (
+            "Writer should have at least 1 remote (TCP) reader in a multi-node setup."
+        )
+    else:
+        if status[rank]:
+            assert mq._is_local_reader, (
+                f"Rank {rank} is on the same node as writer but is not a local reader."
+            )
+            print(f"[Rank {rank}] Reader: local (shared memory)")
+        else:
+            assert mq._is_remote_reader, (
+                f"Rank {rank} is on a different node but is not a remote (TCP) reader."
+            )
+            print(f"[Rank {rank}] Reader: remote (TCP)")
+
+    # Test data transfer: simple objects
+    dist.barrier()
+    if rank == writer_rank:
+        mq.enqueue("hello_from_node0")
+    else:
+        msg = mq.dequeue(timeout=10)
+        assert msg == "hello_from_node0"
+    dist.barrier()
+    print(f"[Rank {rank}] Simple object test passed")
+
+    # Test data transfer: numpy arrays
+    np.random.seed(42)
+    arrays = [
+        np.random.randint(0, 100, size=np.random.randint(100, 5000)) for _ in range(100)
+    ]
+
+    dist.barrier()
+    if rank == writer_rank:
+        for arr in arrays:
+            mq.enqueue(arr)
+    else:
+        for i, expected in enumerate(arrays):
+            received = mq.dequeue(timeout=10)
+            assert np.array_equal(expected, received), (
+                f"Array mismatch at index {i}: "
+                f"expected shape {expected.shape}, got shape {received.shape}"
+            )
+    dist.barrier()
+    print(f"[Rank {rank}] Numpy array test passed")
+
+    # Test data transfer: large payload (> max_chunk_bytes)
+    dist.barrier()
+    big_array = np.zeros(200_000, dtype=np.int64)  # ~1.6 MiB > 1 MiB chunk
+    if rank == writer_rank:
+        mq.enqueue(big_array)
+    else:
+        received = mq.dequeue(timeout=10)
+        assert np.array_equal(big_array, received)
+    dist.barrier()
+    print(f"[Rank {rank}] Large payload test passed")
+
+    # Done -- cleanup
+    dist.barrier()
+    print(f"[Rank {rank}] All MessageQueue TCP multi-node tests passed!")
+    dist.destroy_process_group()
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/distributed/test_ray_v2_executor.py
+++ b/tests/distributed/test_ray_v2_executor.py
@@ -0,0 +1,345 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Integration tests for RayExecutorV2 at the executor level.
+Validates executor initialization, placement group support, RPC calls,
+and distributed execution with various TP/PP configurations.
+"""
+
+import gc
+import threading
+from unittest.mock import patch
+
+import pytest
+import ray
+
+from vllm import LLM
+from vllm.config import VllmConfig
+from vllm.engine.arg_utils import EngineArgs
+from vllm.v1.executor.ray_executor_v2 import RayExecutorV2
+
+pytestmark = pytest.mark.usefixtures("enable_ray_v2_backend")
+
+MODEL = "facebook/opt-125m"
+
+
+def create_vllm_config(
+    tensor_parallel_size: int = 1,
+    pipeline_parallel_size: int = 1,
+    max_model_len: int = 256,
+    gpu_memory_utilization: float = 0.3,
+    placement_group=None,
+) -> VllmConfig:
+    engine_args = EngineArgs(
+        model=MODEL,
+        tensor_parallel_size=tensor_parallel_size,
+        pipeline_parallel_size=pipeline_parallel_size,
+        max_model_len=max_model_len,
+        gpu_memory_utilization=gpu_memory_utilization,
+        distributed_executor_backend="ray",
+        enforce_eager=True,
+    )
+    vllm_config = engine_args.create_engine_config()
+
+    if placement_group is not None:
+        vllm_config.parallel_config.placement_group = placement_group
+
+    return vllm_config
+
+
+def ensure_ray_initialized():
+    if not ray.is_initialized():
+        ray.init(ignore_reinit_error=True)
+
+
+@pytest.fixture
+def create_placement_group(request):
+    ensure_ray_initialized()
+    num_gpus = request.param
+    bundles = [{"GPU": 1, "CPU": 1} for _ in range(num_gpus)]
+    pg = ray.util.placement_group(bundles, strategy="PACK")
+    ray.get(pg.ready())
+    yield pg
+    ray.util.remove_placement_group(pg)
+
+
+@pytest.fixture
+def executor(request):
+    """Create a RayExecutorV2 and shut it down after the test."""
+    executor = RayExecutorV2(vllm_config=request.param)
+    yield executor
+    executor.shutdown()
+
+
+def assert_executor(executor, tp_size, pp_size):
+    """Common assertions for executor initialization tests."""
+    world_size = tp_size * pp_size
+    expected_output_rank = (pp_size - 1) * tp_size
+
+    assert executor.world_size == world_size
+    assert len(executor.ray_worker_handles) == world_size
+    assert len(executor.response_mqs) == world_size
+    assert executor._get_output_rank() == expected_output_rank
+
+    if pp_size > 1:
+        assert executor.max_concurrent_batches == pp_size
+
+    executor.check_health()
+    assert not executor.is_failed
+
+    ranks = sorted(h.rank for h in executor.ray_worker_handles)
+    assert ranks == list(range(world_size))
+
+    for handle in executor.ray_worker_handles:
+        assert handle.node_id is not None
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(1, 1), (2, 1), (4, 1), (2, 2)])
+def test_ray_v2_executor(tp_size, pp_size):
+    """Validate RayExecutorV2 with various TP/PP configs."""
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        assert_executor(executor, tp_size, pp_size)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "tp_size, pp_size, create_placement_group",
+    [(2, 1, 2), (4, 1, 4), (2, 2, 4)],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_executor_pg(tp_size, pp_size, create_placement_group):
+    """Validate RayExecutorV2 with various TP/PP configs using external PG."""
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+        placement_group=create_placement_group,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        assert_executor(executor, tp_size, pp_size)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_failure_callback(executor):
+    """Validate failure callback registration."""
+    callback_invoked = False
+
+    def test_callback():
+        nonlocal callback_invoked
+        callback_invoked = True
+
+    executor.register_failure_callback(test_callback)
+    assert not callback_invoked
+
+    executor.is_failed = True
+    executor.register_failure_callback(test_callback)
+    assert callback_invoked
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_collective_rpc(executor):
+    """Validate collective RPC calls through MessageQueue."""
+    executor.check_health()
+    assert not executor.is_failed
+    assert executor.rpc_broadcast_mq is not None
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_driver_node_rank_0(executor):
+    """Validate that driver node workers get the lowest ranks."""
+    driver_node = ray.get_runtime_context().get_node_id()
+
+    for handle in executor.ray_worker_handles:
+        assert handle.node_id == driver_node
+
+    rank0_handle = next(h for h in executor.ray_worker_handles if h.rank == 0)
+    assert rank0_handle.node_id == driver_node
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_worker_death(executor):
+    """Validate executor detects worker death via ray.wait()."""
+    callback_event = threading.Event()
+
+    def on_failure():
+        callback_event.set()
+
+    executor.register_failure_callback(on_failure)
+    assert not executor.is_failed
+
+    # Kill one worker actor externally
+    victim = executor.ray_worker_handles[1].actor
+    ray.kill(victim, no_restart=True)
+
+    # Monitor thread should detect the death and invoke callback
+    assert callback_event.wait(timeout=30)
+    assert executor.is_failed
+    assert executor.shutting_down
+
+
+def test_ray_v2_executor_shutdown():
+    """Validate graceful shutdown: ray.kill() terminates all worker actors."""
+    executor = RayExecutorV2(vllm_config=create_vllm_config(tensor_parallel_size=2))
+    assert executor.rpc_broadcast_mq is not None
+    assert len(executor.response_mqs) == executor.world_size
+
+    actors = [h.actor for h in executor.ray_worker_handles]
+    executor.shutdown()
+
+    for actor in actors:
+        with pytest.raises(ray.exceptions.RayActorError):
+            ray.get(actor.wait_for_init.remote(), timeout=5)
+
+    assert executor.rpc_broadcast_mq is None
+    assert len(executor.response_mqs) == 0
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_run_refs_stored_for_monitoring(executor):
+    """Validate worker handles store run_ref for monitoring."""
+    for handle in executor.ray_worker_handles:
+        assert handle.run_ref is not None
+        ready, _ = ray.wait([handle.run_ref], timeout=0)
+        assert len(ready) == 0, "run_ref should be pending"
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(2, 1), (2, 2)])
+def test_ray_v2_single_node_generation(tp_size, pp_size):
+    """End-to-end LLM generation with RayExecutorV2."""
+
+    llm = LLM(
+        model=MODEL,
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+        distributed_executor_backend="ray",
+        enforce_eager=True,
+        max_model_len=256,
+        gpu_memory_utilization=0.3,
+    )
+    try:
+        prompts = [
+            "Hello, my name is",
+            "The capital of France is",
+            "The future of AI is",
+        ]
+        outputs = llm.generate(prompts)
+
+        assert len(outputs) == len(prompts)
+        for output in outputs:
+            assert len(output.outputs) > 0
+            assert len(output.outputs[0].text) > 0
+    finally:
+        llm.llm_engine.model_executor.shutdown()
+        del llm
+        gc.collect()
+
+
+@pytest.mark.parametrize(
+    "bundle_indices, expected_bundle_ids, create_placement_group",
+    [("2,3", [2, 3], 4), ("3,2", [3, 2], 4)],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_bundle_indices_env(
+    bundle_indices, expected_bundle_ids, create_placement_group, monkeypatch
+):
+    """Validate explicit VLLM_RAY_BUNDLE_INDICES bundle placement."""
+    monkeypatch.setenv("VLLM_RAY_BUNDLE_INDICES", bundle_indices)
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=2,
+        placement_group=create_placement_group,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        actual = [
+            h.bundle_id_idx
+            for h in sorted(executor.ray_worker_handles, key=lambda h: h.rank)
+        ]
+        assert actual == expected_bundle_ids
+        assert_executor(executor, tp_size=2, pp_size=1)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "bundle_indices, expected_error, create_placement_group",
+    [
+        ("1,1", "cannot have duplicate values,", 4),
+        ("0,1,2", "must have the same size", 4),
+    ],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_invalid_bundle_indices(
+    bundle_indices, expected_error, create_placement_group, monkeypatch
+):
+    """Validate invalid bundle indices are rejected."""
+    monkeypatch.setenv("VLLM_RAY_BUNDLE_INDICES", bundle_indices)
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=2, placement_group=create_placement_group
+    )
+    with pytest.raises(AssertionError, match=expected_error):
+        RayExecutorV2(vllm_config=vllm_config)
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(2, 1), (2, 2)])
+def test_ray_v2_single_node_generation_with_pg(tp_size, pp_size):
+    """E2E LLM generation with a user-provided placement group."""
+    ensure_ray_initialized()
+    bundles = [{"GPU": 1, "CPU": 1} for _ in range(tp_size * pp_size)]
+    pg = ray.util.placement_group(bundles, strategy="PACK")
+    ray.get(pg.ready())
+
+    try:
+        with patch.object(ray.util, "get_current_placement_group", return_value=pg):
+            llm = LLM(
+                model=MODEL,
+                tensor_parallel_size=tp_size,
+                pipeline_parallel_size=pp_size,
+                distributed_executor_backend="ray",
+                enforce_eager=True,
+                max_model_len=256,
+                gpu_memory_utilization=0.3,
+            )
+        prompts = [
+            "Hello, my name is",
+            "The capital of France is",
+            "The future of AI is",
+        ]
+        outputs = llm.generate(prompts)
+
+        assert len(outputs) == len(prompts)
+        for output in outputs:
+            assert len(output.outputs) > 0
+            assert len(output.outputs[0].text) > 0
+    finally:
+        llm.llm_engine.model_executor.shutdown()
+        del llm
+        gc.collect()
--- a/tests/distributed/test_ray_v2_executor_e2e.py
+++ b/tests/distributed/test_ray_v2_executor_e2e.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Orchestration-level integration tests for RayExecutorV2.
+"""
+
+import gc
+import os
+import pathlib
+
+import pytest
+import ray
+
+pytestmark = pytest.mark.usefixtures("enable_ray_v2_backend")
+
+MODEL = "facebook/opt-125m"
+
+
+def _get_env_var(worker, name):
+    return os.environ.get(name)
+
+
+def _ray_init():
+    """Start Ray with the project root on workers' PYTHONPATH.
+
+    Without this, workers cannot unpickle actor classes defined in the
+    ``tests`` package, causing FunctionActorManager to fall back to
+    TemporaryActor which drops async method signatures."""
+    project_root = str(pathlib.Path(__file__).resolve().parents[2])
+    ray.init(
+        ignore_reinit_error=True,
+        runtime_env={"env_vars": {"PYTHONPATH": project_root}},
+    )
+
+
+@pytest.fixture
+def ray_init():
+    _ray_init()
+
+
+class _AsyncLLMActor:
+    def start(self, pg, bundle_indices=None, ray_runtime_env=None):
+        os.environ["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
+        # Needed so collective_rpc can pickle _get_env_var over the
+        # AsyncLLM -> EngineCore ZMQ boundary.
+        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
+        if bundle_indices is not None:
+            os.environ["VLLM_RAY_BUNDLE_INDICES"] = bundle_indices
+        else:
+            os.environ.pop("VLLM_RAY_BUNDLE_INDICES", None)
+
+        from vllm.engine.arg_utils import AsyncEngineArgs
+        from vllm.v1.engine.async_llm import AsyncLLM
+        from vllm.v1.executor.abstract import Executor
+
+        engine_args = AsyncEngineArgs(
+            model=MODEL,
+            tensor_parallel_size=2,
+            distributed_executor_backend="ray",
+            enforce_eager=True,
+            max_model_len=256,
+            gpu_memory_utilization=0.8,
+        )
+        vllm_config = engine_args.create_engine_config()
+        vllm_config.parallel_config.placement_group = pg
+        if ray_runtime_env is not None:
+            vllm_config.parallel_config.ray_runtime_env = ray_runtime_env
+
+        executor_class = Executor.get_class(vllm_config)
+        self.engine = AsyncLLM(
+            vllm_config=vllm_config,
+            executor_class=executor_class,
+            log_stats=False,
+            log_requests=False,
+        )
+
+    async def generate(self, prompt):
+        from vllm.sampling_params import SamplingParams
+
+        params = SamplingParams(max_tokens=16)
+        result = None
+        async for output in self.engine.generate(
+            prompt, params, request_id="test_request_id"
+        ):
+            result = output
+        assert result is not None
+        return result.outputs[0].text
+
+    async def generate_and_get_worker_envs(self, prompt, env_names):
+        from vllm.sampling_params import SamplingParams
+
+        params = SamplingParams(max_tokens=16)
+        result = None
+        async for output in self.engine.generate(
+            prompt, params, request_id="test_request_id"
+        ):
+            result = output
+        assert result is not None
+        text = result.outputs[0].text
+
+        env_results = {}
+        for name in env_names:
+            vals = await self.engine.collective_rpc(
+                _get_env_var, timeout=10, args=(name,)
+            )
+            env_results[name] = vals
+        return text, env_results
+
+    def shutdown(self):
+        if engine := getattr(self, "engine", None):
+            engine.shutdown()
+            del self.engine
+            gc.collect()
+
+
+AsyncLLMActor = ray.remote(num_cpus=0, max_concurrency=1)(_AsyncLLMActor)
+
+
+def test_multi_replicas(ray_init):
+    pg1 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+    pg2 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+    ray.get([pg1.ready(), pg2.ready()])
+
+    actor1 = AsyncLLMActor.remote()
+    actor2 = AsyncLLMActor.remote()
+
+    ray.get(actor1.start.remote(pg1))
+    ray.get(actor2.start.remote(pg2))
+
+    out1, out2 = ray.get(
+        [
+            actor1.generate.remote("Hello world"),
+            actor2.generate.remote("Hello world"),
+        ]
+    )
+    assert len(out1) > 0
+    assert len(out2) > 0
+
+
+def test_multi_replicas_with_bundle_indices(ray_init):
+    pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 4, strategy="PACK")
+    ray.get(pg.ready())
+
+    actor1 = AsyncLLMActor.remote()
+    actor2 = AsyncLLMActor.remote()
+
+    ray.get(actor1.start.remote(pg, bundle_indices="2,1"))
+    ray.get(actor2.start.remote(pg, bundle_indices="0,3"))
+
+    out1, out2 = ray.get(
+        [
+            actor1.generate.remote("Hello world"),
+            actor2.generate.remote("Hello world"),
+        ]
+    )
+    assert len(out1) > 0
+    assert len(out2) > 0
+
+
+def test_env_var_and_runtime_env_propagation():
+    """
+    Verify env vars (NCCL_, HF_) and parallel_config.ray_runtime_env
+    propagate to RayWorkerProc actors.
+    """
+    sentinel_vars = {
+        "NCCL_DEBUG": "INFO",
+        "HF_TOKEN": "test_sentinel_token",
+    }
+    for k, v in sentinel_vars.items():
+        os.environ[k] = v
+
+    try:
+        # Called directly (not via the ray_init fixture) because sentinel
+        # env vars must be in os.environ before ray.init() so that Ray
+        # worker processes inherit them.
+        _ray_init()
+
+        pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+        ray.get(pg.ready())
+
+        # Include the project root so that RayWorkerProc actors can
+        # unpickle _get_env_var.
+        project_root = str(pathlib.Path(__file__).resolve().parents[2])
+        ray_runtime_env = {
+            "env_vars": {
+                "RAY_RUNTIME_ENV_TEST": "ray_runtime_env",
+                "PYTHONPATH": project_root,
+            },
+        }
+
+        actor = AsyncLLMActor.remote()
+        ray.get(actor.start.remote(pg, ray_runtime_env=ray_runtime_env))
+
+        all_env_names = list(sentinel_vars) + ["RAY_RUNTIME_ENV_TEST"]
+        text, env_results = ray.get(
+            actor.generate_and_get_worker_envs.remote("Hello world", all_env_names)
+        )
+        assert len(text) > 0
+
+        for name, expected in sentinel_vars.items():
+            for val in env_results[name]:
+                assert val == expected
+
+        for val in env_results["RAY_RUNTIME_ENV_TEST"]:
+            assert val == "ray_runtime_env"
+
+    finally:
+        for k in sentinel_vars:
+            os.environ.pop(k, None)