[CPU Backend] [Perf] Accelerate tensor-parallel/data-parallel inference across NUMA domains on Arm (#32792)
Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
@@ -379,6 +379,12 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
|
||||
set(VLLM_EXT_SRC
|
||||
"csrc/cpu/shm.cpp"
|
||||
${VLLM_EXT_SRC})
|
||||
endif()
|
||||
|
||||
if(USE_ONEDNN)
|
||||
set(VLLM_EXT_SRC
|
||||
"csrc/cpu/dnnl_kernels.cpp"
|
||||
|
||||
Reference in New Issue
Block a user