diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 1b030eaf1..42459815e 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -528,7 +528,6 @@ def split_decodes_and_prefills( # requests may have a query length of 0 but since they are padding its fine # to treat them as decodes (ensures num_decodes matches the captured size) if torch.all((query_lens == query_lens[0]) | (query_lens == 0)): - assert num_reqs * query_lens[0] == num_tokens, "tokens not padded correctly" return num_reqs, 0, num_tokens, 0 # all decodes is_prefill = query_lens != query_lens[0] else: