[Minor] Enhance error message for TRTLLM decode uniformity check (#36609)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -1110,7 +1110,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
|||||||
if num_decodes > 0:
|
if num_decodes > 0:
|
||||||
if decode_use_trtllm:
|
if decode_use_trtllm:
|
||||||
assert num_decode_tokens % num_decodes == 0, (
|
assert num_decode_tokens % num_decodes == 0, (
|
||||||
"TRTLLM decode requires uniform query lengths per request."
|
"TRTLLM decode requires uniform query lengths per request. "
|
||||||
|
f"Got {num_decode_tokens=} and {num_decodes=}."
|
||||||
)
|
)
|
||||||
attn_metadata.decode = TRTLLMDecode(
|
attn_metadata.decode = TRTLLMDecode(
|
||||||
block_tables=block_table_tensor[:num_decodes],
|
block_tables=block_table_tensor[:num_decodes],
|
||||||
|
|||||||
Reference in New Issue
Block a user