[CPU Backend] [Perf] Accelerate tensor-parallel/data-parallel inference across NUMA domains on Arm (#32792)

Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
Fadi Arafeh
2026-01-22 18:55:23 +00:00
committed by GitHub
parent 300622e609
commit 744ef30484
6 changed files with 164 additions and 6 deletions

View File

@@ -379,6 +379,12 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
endif()
endif()
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
set(VLLM_EXT_SRC
"csrc/cpu/shm.cpp"
${VLLM_EXT_SRC})
endif()
if(USE_ONEDNN)
set(VLLM_EXT_SRC
"csrc/cpu/dnnl_kernels.cpp"