[CPU Backend] [Perf] Accelerate tensor-parallel/data-parallel inference across NUMA domains on Arm (#32792)
Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
@@ -29,7 +29,10 @@ class CpuCommunicator(DeviceCommunicatorBase):
|
||||
self.dist_module = torch.distributed
|
||||
|
||||
if (
|
||||
(current_platform.get_cpu_architecture() == CpuArchEnum.X86)
|
||||
(
|
||||
current_platform.get_cpu_architecture() == CpuArchEnum.X86
|
||||
or current_platform.get_cpu_architecture() == CpuArchEnum.ARM
|
||||
)
|
||||
and hasattr(torch.ops._C, "init_shm_manager")
|
||||
and (unique_name.startswith("tp") or unique_name.startswith("pp"))
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user