[V0 deprecation] Remove V0 HPU backend (#21131)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-07-17 16:37:36 -07:00
parent ac9fb732a5
commit 4de7146351
27 changed files with 10 additions and 3926 deletions
--- a/vllm/distributed/device_communicators/hpu_communicator.py
+++ b/vllm/distributed/device_communicators/hpu_communicator.py
@@ -1,46 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import torch
-import torch.distributed as dist
-
-from vllm.platforms import current_platform
-
-from .base_device_communicator import DeviceCommunicatorBase
-
-if current_platform.is_hpu():
-    import habana_frameworks.torch as htorch  # noqa: F401
-
-
-class HpuCommunicator(DeviceCommunicatorBase):
-
-    def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
-        # FIXME(kzawora): this is a workaround for a bug in Habana PT bridge
-        # occurring when PT_HPU_ENABLE_LAZY_COLLECTIVES=true env var is used
-        # (which is required for tensor parallel HPUGraph inference)
-        htorch.core.mark_step()
-        dist.all_reduce(input_, group=self.device_group)
-        return input_
-
-    def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Tensor:
-        world_size = self.world_size
-        if dim < 0:
-            # Convert negative dim to positive.
-            dim += input_.dim()
-        input_size = input_.size()
-        # Allocate output tensor.
-        output_tensor = torch.empty((world_size, ) + input_size,
-                                    dtype=input_.dtype,
-                                    device=input_.device)
-        # All-gather.
-        htorch.core.mark_step()
-        dist.all_gather_into_tensor(output_tensor,
-                                    input_,
-                                    group=self.device_group)
-        # Reshape
-        output_tensor = output_tensor.movedim(0, dim)
-        output_tensor = output_tensor.reshape(input_size[:dim] +
-                                              (world_size *
-                                               input_size[dim], ) +
-                                              input_size[dim + 1:])
-        return output_tensor