[Refactor]Abstract Platform Interface for Distributed Backend and Add xccl Support for Intel XPU (#19410)

Signed-off-by: dbyoung18 <yang5.yang@intel.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
2025-07-07 12:32:32 +08:00
parent 47db8c2c15
commit 6e2c19ce22
17 changed files with 44 additions and 8 deletions
--- a/vllm/platforms/init.py
+++ b/vllm/platforms/init.py
@@ -7,7 +7,7 @@ from itertools import chain
 from typing import TYPE_CHECKING, Optional

 from vllm.plugins import load_plugins_by_group
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils import resolve_obj_by_qualname, supports_xccl

 from .interface import _Backend  # noqa: F401
 from .interface import CpuArchEnum, Platform, PlatformEnum
@@ -139,10 +139,19 @@ def xpu_platform_plugin() -> Optional[str]:
    try:
        # installed IPEX if the machine has XPUs.
        import intel_extension_for_pytorch  # noqa: F401
-        import oneccl_bindings_for_pytorch  # noqa: F401
        import torch
+        if supports_xccl():
+            dist_backend = "xccl"
+        else:
+            dist_backend = "ccl"
+            import oneccl_bindings_for_pytorch  # noqa: F401
+
        if hasattr(torch, 'xpu') and torch.xpu.is_available():
            is_xpu = True
+            from vllm.platforms.xpu import XPUPlatform
+            XPUPlatform.dist_backend = dist_backend
+            logger.debug("Confirmed %s backend is available.",
+                         XPUPlatform.dist_backend)
            logger.debug("Confirmed XPU platform is available.")
    except Exception as e:
        logger.debug("XPU platform is not available because: %s", str(e))