From 195d1ca3e8b1662e5df88b159a4306c48e1b0b5c Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Tue, 10 Mar 2026 15:38:45 -0700 Subject: [PATCH] [Minor] Enhance error message for TRTLLM decode uniformity check (#36609) Signed-off-by: Woosuk Kwon --- vllm/v1/attention/backends/flashinfer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 091a98952..844e8597e 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -1110,7 +1110,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): if num_decodes > 0: if decode_use_trtllm: assert num_decode_tokens % num_decodes == 0, ( - "TRTLLM decode requires uniform query lengths per request." + "TRTLLM decode requires uniform query lengths per request. " + f"Got {num_decode_tokens=} and {num_decodes=}." ) attn_metadata.decode = TRTLLMDecode( block_tables=block_table_tensor[:num_decodes],