[CPU] Replace OMP initialization (#36487)

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
2026-04-03 11:42:43 +01:00
parent 25f2b55319
commit abebd9323d
7 changed files with 321 additions and 426 deletions
--- a/csrc/cpu/torch_bindings.cpp
+++ b/csrc/cpu/torch_bindings.cpp
@@ -8,8 +8,6 @@
 // libraries use different ISAs.
 #define TORCH_EXTENSION_NAME _C

-std::string init_cpu_threads_env(const std::string& cpu_ids);
-
 void release_dnnl_matmul_handler(int64_t handler);

 int64_t create_onednn_scaled_mm_handler(const torch::Tensor& b,
@@ -354,7 +352,6 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
      "str act, str isa) -> ()");
  ops.impl("cpu_fused_moe", torch::kCPU, &cpu_fused_moe);
 #endif
-  ops.def("init_cpu_threads_env(str cpu_ids) -> str", &init_cpu_threads_env);
  ops.def(
      "mla_decode_kvcache("
      "   Tensor! out, Tensor query, Tensor kv_cache,"
--- a/csrc/cpu/utils.cpp
+++ b/csrc/cpu/utils.cpp
@@ -21,150 +21,6 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {

 #endif

-#ifndef VLLM_NUMA_DISABLED
-std::string init_cpu_threads_env(const std::string& cpu_ids) {
-  bitmask* omp_cpu_mask = numa_parse_cpustring_all(cpu_ids.c_str());
-  TORCH_CHECK(omp_cpu_mask != nullptr,
-              "Failed to parse CPU string: " + cpu_ids);
-  TORCH_CHECK(omp_cpu_mask->size > 0);
-  std::vector<int> omp_cpu_ids;
-  omp_cpu_ids.reserve(omp_cpu_mask->size);
-
-  constexpr int group_size = 8 * sizeof(*omp_cpu_mask->maskp);
-
-  for (int offset = 0; offset < omp_cpu_mask->size; offset += group_size) {
-    unsigned long group_mask = omp_cpu_mask->maskp[offset / group_size];
-    int i = 0;
-    while (group_mask) {
-      if (group_mask & 1) {
-        omp_cpu_ids.emplace_back(offset + i);
-      }
-      ++i;
-      group_mask >>= 1;
-    }
-  }
-
-  // Memory node binding
-  if (numa_available() != -1) {
-    std::set<int> node_ids;
-    for (const auto& cpu_id : omp_cpu_ids) {
-      int node_id = numa_node_of_cpu(cpu_id);
-      if (node_id != -1) {
-        node_ids.insert(node_id);
-      }
-    }
-    // Concatenate all node_ids into a single comma-separated string
-    if (!node_ids.empty()) {
-      std::string node_ids_str;
-      for (const int node_id : node_ids) {
-        if (!node_ids_str.empty()) {
-          node_ids_str += ",";
-        }
-        node_ids_str += std::to_string(node_id);
-      }
-
-      bitmask* mask = numa_parse_nodestring(node_ids_str.c_str());
-      bitmask* src_mask = numa_get_mems_allowed();
-
-      int pid = getpid();
-
-      if (mask && src_mask) {
-        // move all existing pages to the specified numa node.
-        *(src_mask->maskp) = *(src_mask->maskp) ^ *(mask->maskp);
-        int page_num = numa_migrate_pages(pid, src_mask, mask);
-        if (page_num == -1) {
-          TORCH_WARN("numa_migrate_pages failed. errno: " +
-                     std::to_string(errno));
-        }
-
-        // Restrict memory allocation to the selected NUMA node(s).
-        // Enhances memory locality for the threads bound to those NUMA CPUs.
-        if (node_ids.size() > 1) {
-          errno = 0;
-          numa_set_interleave_mask(mask);
-          if (errno != 0) {
-            TORCH_WARN("numa_set_interleave_mask failed. errno: " +
-                       std::to_string(errno));
-          } else {
-            TORCH_WARN(
-                "NUMA binding: Using INTERLEAVE policy for memory "
-                "allocation across multiple NUMA nodes (nodes: " +
-                node_ids_str +
-                "). Memory allocations will be "
-                "interleaved across the specified NUMA nodes.");
-          }
-        } else {
-          errno = 0;
-          numa_set_membind(mask);
-          if (errno != 0) {
-            TORCH_WARN("numa_set_membind failed. errno: " +
-                       std::to_string(errno));
-          } else {
-            TORCH_WARN(
-                "NUMA binding: Using MEMBIND policy for memory "
-                "allocation on the NUMA nodes (" +
-                node_ids_str +
-                "). Memory allocations will be "
-                "strictly bound to these NUMA nodes.");
-          }
-        }
-
-        numa_set_strict(1);
-
-        numa_free_nodemask(mask);
-        numa_free_nodemask(src_mask);
-      } else {
-        TORCH_WARN(
-            "numa_parse_nodestring or numa_get_run_node_mask failed. errno: " +
-            std::to_string(errno));
-      }
-    }
-  }
-
-  // OMP threads binding
-  omp_set_num_threads((int)omp_cpu_ids.size());
-  torch::set_num_threads((int)omp_cpu_ids.size());
-  TORCH_CHECK_EQ(omp_cpu_ids.size(), torch::get_num_threads());
-  TORCH_CHECK_EQ(omp_cpu_ids.size(), omp_get_max_threads());
-
-  std::vector<std::pair<int, int>> thread_core_mapping;
-  thread_core_mapping.reserve(omp_cpu_ids.size());
-  omp_lock_t writelock;
-  omp_init_lock(&writelock);
-
-  #pragma omp parallel for schedule(static, 1)
-  for (size_t i = 0; i < omp_cpu_ids.size(); ++i) {
-    cpu_set_t mask;
-    CPU_ZERO(&mask);
-    CPU_SET(omp_cpu_ids[i], &mask);
-    int ret = sched_setaffinity(0, sizeof(cpu_set_t), &mask);
-    if (ret == -1) {
-      TORCH_CHECK(false,
-                  "sched_setaffinity failed. errno: " + std::to_string(errno));
-    }
-
-    omp_set_lock(&writelock);
-    thread_core_mapping.emplace_back(gettid(), omp_cpu_ids[i]);
-    omp_unset_lock(&writelock);
-  }
-
-  omp_destroy_lock(&writelock);
-
-  numa_free_nodemask(omp_cpu_mask);
-
-  std::stringstream ss;
-  ss << "OMP threads binding of Process " << getpid() << ":\n";
-  std::sort(thread_core_mapping.begin(), thread_core_mapping.end(),
-            [](auto&& a, auto&& b) { return a.second < b.second; });
-  for (auto&& item : thread_core_mapping) {
-    ss << "\t"
-       << "OMP tid: " << item.first << ", core " << item.second << "\n";
-  }
-
-  return ss.str();
-}
-#endif  // VLLM_NUMA_DISABLED
-
 namespace cpu_utils {
 ScratchPadManager::ScratchPadManager() : size_(0), ptr_(nullptr) {
  this->realloc(allocation_unit * 128);