[feature] extend DBO to XBO (#30120)

Signed-off-by: jiangkuaixue123 <jiangxiaozhou111@163.com> Co-authored-by: root <root@hk01dgx028.cm.cluster>
2025-12-16 13:04:01 +08:00
parent c881db364e
commit b9ff4f2a8d
10 changed files with 133 additions and 73 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -156,6 +156,8 @@ class ParallelConfig:

    enable_dbo: bool = False
    """Enable dual batch overlap for the model executor."""
+    ubatch_size: int = 0
+    """Number of ubatch size."""

    dbo_decode_token_threshold: int = 32
    """The threshold for dual batch overlap for batches only containing decodes.
@@ -325,6 +327,14 @@ class ParallelConfig:
        including data parallelism."""
        return self.world_size * self.data_parallel_size

+    @property
+    def use_ubatching(self) -> bool:
+        return self.enable_dbo or self.ubatch_size > 1
+
+    @property
+    def num_ubatches(self) -> int:
+        return 2 if self.enable_dbo else self.ubatch_size
+
    def get_next_dp_init_port(self) -> int:
        """
        We might need to initialize process groups in multiple
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -870,9 +870,12 @@ class VllmConfig:
                    f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
                )

-        if self.parallel_config.enable_dbo:
+        if self.parallel_config.use_ubatching:
            a2a_backend = self.parallel_config.all2all_backend
-            assert a2a_backend in ["deepep_low_latency", "deepep_high_throughput"], (
+            assert a2a_backend in [
+                "deepep_low_latency",
+                "deepep_high_throughput",
+            ], (
                "Microbatching currently only supports the deepep_low_latency and "
                f"deepep_high_throughput all2all backend. {a2a_backend} is not "
                "supported. To fix use --all2all-backend=deepep_low_latency or "