[feature] extend DBO to XBO (#30120)

Signed-off-by: jiangkuaixue123 <jiangxiaozhou111@163.com>
Co-authored-by: root <root@hk01dgx028.cm.cluster>
This commit is contained in:
jiangkuaixue123
2025-12-16 13:04:01 +08:00
committed by GitHub
parent c881db364e
commit b9ff4f2a8d
10 changed files with 133 additions and 73 deletions

View File

@@ -156,6 +156,8 @@ class ParallelConfig:
enable_dbo: bool = False
"""Enable dual batch overlap for the model executor."""
ubatch_size: int = 0
"""Number of ubatch size."""
dbo_decode_token_threshold: int = 32
"""The threshold for dual batch overlap for batches only containing decodes.
@@ -325,6 +327,14 @@ class ParallelConfig:
including data parallelism."""
return self.world_size * self.data_parallel_size
@property
def use_ubatching(self) -> bool:
return self.enable_dbo or self.ubatch_size > 1
@property
def num_ubatches(self) -> int:
return 2 if self.enable_dbo else self.ubatch_size
def get_next_dp_init_port(self) -> int:
"""
We might need to initialize process groups in multiple

View File

@@ -870,9 +870,12 @@ class VllmConfig:
f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
)
if self.parallel_config.enable_dbo:
if self.parallel_config.use_ubatching:
a2a_backend = self.parallel_config.all2all_backend
assert a2a_backend in ["deepep_low_latency", "deepep_high_throughput"], (
assert a2a_backend in [
"deepep_low_latency",
"deepep_high_throughput",
], (
"Microbatching currently only supports the deepep_low_latency and "
f"deepep_high_throughput all2all backend. {a2a_backend} is not "
"supported. To fix use --all2all-backend=deepep_low_latency or "