[Bugfix] Improve GPU validation logging in Ray fallback scenarios (#25775)

Signed-off-by: Sairam Pillai <sairam.pillai61@gmail.com>
This commit is contained in:
Sairam Pillai
2025-10-30 17:27:59 +05:30
committed by GitHub
parent c01f6e525f
commit 74374386e2
2 changed files with 49 additions and 15 deletions

View File

@@ -521,15 +521,11 @@ class ParallelConfig:
current_platform.is_cuda()
and cuda_device_count_stateless() < self.world_size
):
if not ray_found:
raise ValueError(
"Unable to load Ray: "
f"{ray_utils.ray_import_err}. Ray is "
"required for multi-node inference, "
"please install Ray with `pip install "
"ray`."
)
backend = "ray"
gpu_count = cuda_device_count_stateless()
raise ValueError(
f"Tensor parallel size ({self.world_size}) cannot be "
f"larger than the number of available GPUs ({gpu_count})."
)
elif self.data_parallel_backend == "ray":
logger.info(
"Using ray distributed inference because "