diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 2e73c56b3..66a5ee67e 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -661,7 +661,18 @@ class NvmlCudaPlatform(CudaPlatformBase):
         handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id)
 
         try:
-            return pynvml.nvmlDeviceGetNumaNodeId(handle)
+            numa_node = pynvml.nvmlDeviceGetNumaNodeId(handle)
+            if cls._numa_node_has_cpus(numa_node):
+                return numa_node
+            # On non-CDMM Grace-Blackwell systems (e.g. GB200), each GPU's HBM
+            # is a separate NUMA node with no CPUs.  Fall through to
+            # CPU-affinity-based detection to find the nearest CPU node.
+            logger.debug(
+                "NUMA node %d for GPU %d has no CPUs (non-CDMM topology), "
+                "falling back to CPU-affinity-based detection",
+                numa_node,
+                device_id,
+            )
         except Exception:
             pass
 
@@ -681,6 +692,17 @@ class NvmlCudaPlatform(CudaPlatformBase):
 
         return None
 
+    @classmethod
+    def _numa_node_has_cpus(cls, node_id: int) -> bool:
+        """Check whether a NUMA node has any CPUs assigned to it."""
+        from pathlib import Path
+
+        cpulist_file = Path(f"/sys/devices/system/node/node{node_id}/cpulist")
+        try:
+            return cpulist_file.read_text().strip() != ""
+        except (OSError, ValueError):
+            return False
+
     @classmethod
     def _get_device_cpu_affinity(cls, handle) -> list[int]:
         """Get the list of CPU IDs associated with a GPU via NVML."""