From be263f76451ad8a32baf0b935d3f0432d05300e6 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Sat, 15 Nov 2025 17:35:06 -0500 Subject: [PATCH] [BugFix] Fix `AssertionError: DCP not support reorder_batch_threshold > 1 now.` (#28751) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 6590ca54a..ffbac5fe1 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -630,16 +630,6 @@ class GPUModelRunner( return if self.reorder_batch_threshold is not None: - # NOTE(lucas): currently no backend supports the custom masking - # required for DCP with q_len > 1, so we assert here. Remove this - # assert once the custom mask is support is added to FA3. - if ( - self.dcp_world_size > 1 - and envs.VLLM_ATTENTION_BACKEND != "FLASH_ATTN_MLA" - ): - assert self.reorder_batch_threshold == 1, ( - "DCP not support reorder_batch_threshold > 1 now." - ) reorder_batch_to_split_decodes_and_prefills( self.input_batch, scheduler_output,