diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
index 0178d23b7..3409f04a1 100644
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -15,8 +15,6 @@ FROM ${BASE_IMAGE} AS base
 
 ARG ARG_PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
-ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
-ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
 
 # Install some basic utilities
 RUN apt-get update -q -y && apt-get install -q -y \
diff --git a/tests/config/test_config_generation.py b/tests/config/test_config_generation.py
index 61c3df0a2..23ceb920c 100644
--- a/tests/config/test_config_generation.py
+++ b/tests/config/test_config_generation.py
@@ -60,6 +60,11 @@ def test_ray_runtime_env(monkeypatch: pytest.MonkeyPatch):
     runtime_env = {
         "env_vars": {
             "TEST_ENV_VAR": "test_value",
+            # In future ray versions, this will be default, so when setting a
+            # task or actor with num_gpus=None/0, the visible devices env var
+            # won't be overridden resulting in no GPUs being visible on a gpu
+            # machine.
+            "RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO": "0",
         },
     }
 
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index fbe791f8a..0c0bd7db3 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -102,6 +102,9 @@ class CudaPlatformBase(Platform):
     ray_device_key: str = "GPU"
     dist_backend: str = "nccl"
     device_control_env_var: str = "CUDA_VISIBLE_DEVICES"
+    ray_noset_device_env_vars: list[str] = [
+        "RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES",
+    ]
 
     @property
     def supported_dtypes(self) -> list[torch.dtype]:
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index f0e7ee0da..c3b189e01 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -116,6 +116,11 @@ class Platform:
     # https://github.com/ray-project/ray/tree/master/python/ray/_private/accelerators # noqa
     device_control_env_var: str = "VLLM_DEVICE_CONTROL_ENV_VAR_PLACEHOLDER"
 
+    # environment variables that need to be set to 1 to prevent ray from
+    # setting the visible devices e.g.
+    # RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES
+    ray_noset_device_env_vars: list[str] = []
+
     # The torch.compile backend for compiling simple and
     # standalone functions. The default value is "inductor" to keep
     # the same behavior as PyTorch.
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 6f4c235bb..2a9bd53e4 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -194,6 +194,11 @@ class RocmPlatform(Platform):
     dist_backend: str = "nccl"
     # rocm shares the same device control env var as CUDA
     device_control_env_var: str = "CUDA_VISIBLE_DEVICES"
+    ray_noset_device_env_vars: list[str] = [
+        "RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES",
+        "RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES",
+        "RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES",
+    ]
 
     supported_quantization: list[str] = [
         "awq",
diff --git a/vllm/v1/executor/ray_executor.py b/vllm/v1/executor/ray_executor.py
index c8c6185b6..a1f69c478 100644
--- a/vllm/v1/executor/ray_executor.py
+++ b/vllm/v1/executor/ray_executor.py
@@ -69,6 +69,8 @@ class RayDistributedExecutor(Executor):
         "VLLM_HOST_PORT",
         "LOCAL_RANK",
         "CUDA_VISIBLE_DEVICES",
+        "HIP_VISIBLE_DEVICES",
+        "ROCR_VISIBLE_DEVICES",
     }
 
     # These non-vLLM env vars are copied from the driver to workers
@@ -146,6 +148,14 @@ class RayDistributedExecutor(Executor):
 
         return ray_remote_kwargs
 
+    def _update_noset_device_env_vars(self, ray_remote_kwargs):
+        runtime_env = ray_remote_kwargs.setdefault("runtime_env", {})
+        env_vars = runtime_env.setdefault("env_vars", {})
+        env_vars.update(
+            {env_var: "1" for env_var in current_platform.ray_noset_device_env_vars}
+        )
+        return ray_remote_kwargs
+
     # child class could overwrite this to return actual env vars.
     def _get_env_vars_to_be_updated(self):
         return self._env_vars_for_all_workers
@@ -169,6 +179,11 @@ class RayDistributedExecutor(Executor):
                 ray_remote_kwargs
             )
 
+        # The way ray actors are setup in vllm is that the visible devices are
+        # not set by actors, they are left unset by ray. Internally we index
+        # the right gpu with local_rank. This is similar to how mp mode works.
+        self._update_noset_device_env_vars(ray_remote_kwargs)
+
         # Create the workers.
         bundle_indices: list[int]
         if envs.VLLM_RAY_BUNDLE_INDICES:
@@ -303,6 +318,15 @@ class RayDistributedExecutor(Executor):
             )
 
         # Set environment variables for the driver and workers.
+        # We set CUDA_VISIBLE_DEVICES to ALL GPUs on the node for each worker.
+        # This is needed because:
+        # 1. Ray's compiled DAG needs to find the allocated GPU in
+        #    CUDA_VISIBLE_DEVICES.
+        # 2. vLLM's communication layer (NCCL, CustomAllreduce) needs to see
+        #    all GPUs for P2P checks and communication setup. Though if it was
+        #    just this reason, we could have also just kept the visible devices
+        #    unset.
+        # Each worker will use local_rank to index into the visible devices.
         all_args_to_update_environment_variables = [
             {
                 current_platform.device_control_env_var: ",".join(
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index f6e59526e..b451db382 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -209,6 +209,7 @@ class Worker(WorkerBase):
                     f"be less than or equal to the number of visible devices "
                     f"({visible_device_count})."
                 )
+
             self.device = torch.device(f"cuda:{self.local_rank}")
             current_platform.set_device(self.device)
 
diff --git a/vllm/v1/worker/worker_base.py b/vllm/v1/worker/worker_base.py
index d34eb5253..eed371e98 100644
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import os
 from collections.abc import Callable
 from typing import TYPE_CHECKING, Any, TypeVar
 
@@ -221,11 +220,6 @@ class WorkerWrapperBase:
         envs_list: list[dict[str, str]],
     ) -> None:
         envs = envs_list[self.rpc_rank]
-        key = "CUDA_VISIBLE_DEVICES"
-        if key in envs and key in os.environ:
-            # overwriting CUDA_VISIBLE_DEVICES is desired behavior
-            # suppress the warning in `update_environment_variables`
-            del os.environ[key]
         update_environment_variables(envs)
 
     def init_worker(self, all_kwargs: list[dict[str, Any]]) -> None: