[Minor] Enhance error message for TRTLLM decode uniformity check (#36609)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -1111,6 +1111,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
||||
if decode_use_trtllm:
|
||||
assert num_decode_tokens % num_decodes == 0, (
|
||||
"TRTLLM decode requires uniform query lengths per request. "
|
||||
f"Got {num_decode_tokens=} and {num_decodes=}."
|
||||
)
|
||||
attn_metadata.decode = TRTLLMDecode(
|
||||
block_tables=block_table_tensor[:num_decodes],
|
||||
|
||||
Reference in New Issue
Block a user