diff --git a/vllm/distributed/device_communicators/symm_mem.py b/vllm/distributed/device_communicators/symm_mem.py index 74d6fb40c..eb1f173b1 100644 --- a/vllm/distributed/device_communicators/symm_mem.py +++ b/vllm/distributed/device_communicators/symm_mem.py @@ -88,13 +88,21 @@ class SymmMemCommunicator: self.max_size = SYMM_MEM_ALL_REDUCE_MAX_SIZES[self.device_capability][ self.world_size ] - - self.buffer = torch_symm_mem.empty( - self.max_size // self.dtype.itemsize, - device=self.device, - dtype=self.dtype, - ) - handle = torch_symm_mem.rendezvous(self.buffer, self.group.group_name) + try: + self.buffer = torch_symm_mem.empty( + self.max_size // self.dtype.itemsize, + device=self.device, + dtype=self.dtype, + ) + handle = torch_symm_mem.rendezvous(self.buffer, self.group.group_name) + except RuntimeError as e: + logger.warning_once( + "SymmMemCommunicator: symmetric memory initialization failed: %s " + "Communicator is not available. To suppress this warning set " + "VLLM_ALLREDUCE_USE_SYMM_MEM=0", + str(e), + ) + return if handle.multicast_ptr == 0: logger.warning( "SymmMemCommunicator: symmetric memory " diff --git a/vllm/envs.py b/vllm/envs.py index 5274c8ba1..46725efac 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -201,7 +201,7 @@ if TYPE_CHECKING: VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False VLLM_ROCM_FP8_MFMA_PAGE_ATTN: bool = False VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False - VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False + VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True VLLM_TUNED_CONFIG_FOLDER: str | None = None VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS: set[str] = set() VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False @@ -1389,7 +1389,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ), # Whether to use pytorch symmetric memory for allreduce "VLLM_ALLREDUCE_USE_SYMM_MEM": lambda: bool( - int(os.getenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "0")) + int(os.getenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "1")) ), # Allows vllm to find tuned config under customized folder "VLLM_TUNED_CONFIG_FOLDER": lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),