[Feature]Add async tensor parallelism using compilation pass (#17882)

Signed-off-by: cascade812 <cascade812@outlook.com>
2025-05-23 01:03:34 -07:00
parent 4c611348a7
commit 71ea614d4a
11 changed files with 472 additions and 56 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3652,6 +3652,8 @@ class PassConfig:
    """Whether to enable the custom no-op elimination pass."""
    enable_sequence_parallelism: bool = False
    """Whether to enable sequence parallelism."""
+    enable_async_tp: bool = False
+    """Whether to enable async TP."""

    def uuid(self):
        """
@@ -3661,7 +3663,8 @@ class PassConfig:
        compilation.
        """
        include = {
-            "enable_fusion", "enable_noop", "enable_sequence_parallelism"
+            "enable_fusion", "enable_noop", "enable_sequence_parallelism",
+            "enable_async_tp"
        }
        dict_ = {k: v for k, v in asdict(self).items() if k in include}
        return InductorPass.hash_dict(dict_)
@@ -4274,6 +4277,12 @@ class VllmConfig:

        if self.compilation_config is None:
            self.compilation_config = CompilationConfig()
+
+        # async tp is built on top of sequence parallelism
+        # and requires it to be enabled.
+        if self.compilation_config.pass_config.enable_async_tp:
+            self.compilation_config.pass_config.enable_sequence_parallelism = \
+                True
        if self.compilation_config.pass_config.enable_sequence_parallelism:
            self.compilation_config.custom_ops.append("+rms_norm")
        if envs.VLLM_USE_V1 and self.model_config is not None and \