From ed733802f0446ba7170dbb921ec6689016d1fa3a Mon Sep 17 00:00:00 2001 From: Qidong Su Date: Thu, 9 Apr 2026 03:36:51 -0400 Subject: [PATCH] Fix NUMA binding on non-CDMM Grace-Blackwell systems (#39361) Signed-off-by: Qidong Su Co-authored-by: Claude Opus 4.6 (1M context) --- vllm/platforms/cuda.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 2e73c56b3..66a5ee67e 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -661,7 +661,18 @@ class NvmlCudaPlatform(CudaPlatformBase): handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id) try: - return pynvml.nvmlDeviceGetNumaNodeId(handle) + numa_node = pynvml.nvmlDeviceGetNumaNodeId(handle) + if cls._numa_node_has_cpus(numa_node): + return numa_node + # On non-CDMM Grace-Blackwell systems (e.g. GB200), each GPU's HBM + # is a separate NUMA node with no CPUs. Fall through to + # CPU-affinity-based detection to find the nearest CPU node. + logger.debug( + "NUMA node %d for GPU %d has no CPUs (non-CDMM topology), " + "falling back to CPU-affinity-based detection", + numa_node, + device_id, + ) except Exception: pass @@ -681,6 +692,17 @@ class NvmlCudaPlatform(CudaPlatformBase): return None + @classmethod + def _numa_node_has_cpus(cls, node_id: int) -> bool: + """Check whether a NUMA node has any CPUs assigned to it.""" + from pathlib import Path + + cpulist_file = Path(f"/sys/devices/system/node/node{node_id}/cpulist") + try: + return cpulist_file.read_text().strip() != "" + except (OSError, ValueError): + return False + @classmethod def _get_device_cpu_affinity(cls, handle) -> list[int]: """Get the list of CPU IDs associated with a GPU via NVML."""