[Minor] Enhance error message for TRTLLM decode uniformity check (#36609)

Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
Woosuk Kwon
2026-03-10 15:38:45 -07:00
committed by GitHub
parent 8d983d7cd6
commit 195d1ca3e8

View File

@@ -1111,6 +1111,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
if decode_use_trtllm:
assert num_decode_tokens % num_decodes == 0, (
"TRTLLM decode requires uniform query lengths per request. "
f"Got {num_decode_tokens=} and {num_decodes=}."
)
attn_metadata.decode = TRTLLMDecode(
block_tables=block_table_tensor[:num_decodes],