[HARDWARE][CPU] Add Option for Disabling Binding to Specific CPU Cores (#27953)

Signed-off-by: Stan Hatko <stan_hatko@live.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
2025-11-06 10:47:11 -05:00
parent 2176778cd3
commit e52e4da971
3 changed files with 15 additions and 8 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -14,6 +14,7 @@ from typing import TYPE_CHECKING
 import regex as re
 import torch

+from vllm import envs
 from vllm.logger import init_logger
 from vllm.utils import DEFAULT_MAX_NUM_BATCHED_TOKENS

@@ -151,7 +152,6 @@ class CpuPlatform(Platform):

    @classmethod
    def get_device_total_memory(cls, device_id: int = 0) -> int:
-        import vllm.envs as envs
        from vllm.utils.mem_constants import GiB_bytes

        kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE
@@ -289,11 +289,16 @@ class CpuPlatform(Platform):
        os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

        # Note: to avoid the error 'nthreads cannot be larger than environment
-        #  variable "NUMEXPR_MAX_THREADS" (64)'.
+        # variable "NUMEXPR_MAX_THREADS" (64)'.
        os.environ["NUMEXPR_MAX_THREADS"] = str(get_max_threads())

-        # Set default threads num for OpenMP parallel
-        os.environ["OMP_NUM_THREADS"] = str(torch.get_num_threads())
+        if envs.VLLM_CPU_OMP_THREADS_BIND != "nobind":
+            # Set default threads num for OpenMP parallel
+            os.environ["OMP_NUM_THREADS"] = str(torch.get_num_threads())
+        else:
+            # In this case, setting the OpenMP configuration via
+            # OMP_NUM_THREADS is up to the user.
+            logger.info("Disabling binding processes to CPU cores...")

        # Disable torch async compiling which won't work with daemonic processes
        os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"