[feature] extend DBO to XBO (#30120)
Signed-off-by: jiangkuaixue123 <jiangxiaozhou111@163.com> Co-authored-by: root <root@hk01dgx028.cm.cluster>
This commit is contained in:
@@ -156,6 +156,8 @@ class ParallelConfig:
|
||||
|
||||
enable_dbo: bool = False
|
||||
"""Enable dual batch overlap for the model executor."""
|
||||
ubatch_size: int = 0
|
||||
"""Number of ubatch size."""
|
||||
|
||||
dbo_decode_token_threshold: int = 32
|
||||
"""The threshold for dual batch overlap for batches only containing decodes.
|
||||
@@ -325,6 +327,14 @@ class ParallelConfig:
|
||||
including data parallelism."""
|
||||
return self.world_size * self.data_parallel_size
|
||||
|
||||
@property
|
||||
def use_ubatching(self) -> bool:
|
||||
return self.enable_dbo or self.ubatch_size > 1
|
||||
|
||||
@property
|
||||
def num_ubatches(self) -> int:
|
||||
return 2 if self.enable_dbo else self.ubatch_size
|
||||
|
||||
def get_next_dp_init_port(self) -> int:
|
||||
"""
|
||||
We might need to initialize process groups in multiple
|
||||
|
||||
@@ -870,9 +870,12 @@ class VllmConfig:
|
||||
f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
|
||||
)
|
||||
|
||||
if self.parallel_config.enable_dbo:
|
||||
if self.parallel_config.use_ubatching:
|
||||
a2a_backend = self.parallel_config.all2all_backend
|
||||
assert a2a_backend in ["deepep_low_latency", "deepep_high_throughput"], (
|
||||
assert a2a_backend in [
|
||||
"deepep_low_latency",
|
||||
"deepep_high_throughput",
|
||||
], (
|
||||
"Microbatching currently only supports the deepep_low_latency and "
|
||||
f"deepep_high_throughput all2all backend. {a2a_backend} is not "
|
||||
"supported. To fix use --all2all-backend=deepep_low_latency or "
|
||||
|
||||
Reference in New Issue
Block a user