[Bugfix] Improve GPU validation logging in Ray fallback scenarios (#25775)

Signed-off-by: Sairam Pillai <sairam.pillai61@gmail.com>
2025-10-30 17:27:59 +05:30
parent c01f6e525f
commit 74374386e2
2 changed files with 49 additions and 15 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -521,15 +521,11 @@ class ParallelConfig:
                current_platform.is_cuda()
                and cuda_device_count_stateless() < self.world_size
            ):
-                if not ray_found:
-                    raise ValueError(
-                        "Unable to load Ray: "
-                        f"{ray_utils.ray_import_err}. Ray is "
-                        "required for multi-node inference, "
-                        "please install Ray with `pip install "
-                        "ray`."
-                    )
-                backend = "ray"
+                gpu_count = cuda_device_count_stateless()
+                raise ValueError(
+                    f"Tensor parallel size ({self.world_size}) cannot be "
+                    f"larger than the number of available GPUs ({gpu_count})."
+                )
            elif self.data_parallel_backend == "ray":
                logger.info(
                    "Using ray distributed inference because "