[BugFix] DPMetadata raises assert error for dense model (#32739)
Co-authored-by: Dezhan Tu <dztu@meta.com>
This commit is contained in:
@@ -203,7 +203,7 @@ class ForwardContext:
|
||||
attn_metadata: dict[str, AttentionMetadata] | list[dict[str, AttentionMetadata]]
|
||||
slot_mapping: dict[str, torch.Tensor] | list[dict[str, torch.Tensor]]
|
||||
"""
|
||||
Type Dict[str, AttentionMetadata] for v1, map from layer_name of each
|
||||
Type Dict[str, AttentionMetadata] for v1, map from layer_name of each
|
||||
attention layer to its attention metadata
|
||||
Type List[Dict[str, AttentionMetadata]] for DBO. List of size two, one
|
||||
for each microbatch.
|
||||
@@ -339,8 +339,10 @@ def set_forward_context(
|
||||
forward_start_time = time.perf_counter()
|
||||
|
||||
dp_metadata: DPMetadata | None = None
|
||||
if vllm_config.parallel_config.data_parallel_size > 1 and (
|
||||
attn_metadata is not None or num_tokens is not None
|
||||
if (
|
||||
vllm_config.parallel_config.data_parallel_size > 1
|
||||
and vllm_config.parallel_config.is_moe_model is not False
|
||||
and (attn_metadata is not None or num_tokens is not None)
|
||||
):
|
||||
# If num_tokens_across_dp hasn't already been initialized, then
|
||||
# initialize it here. Both DP padding and Microbatching will be
|
||||
|
||||
Reference in New Issue
Block a user