[Perf] Disable clean_logits in deepgemm fp8_mqa_logits kernel (#33568)

This commit is contained in:
Xin Yang
2026-02-05 17:34:00 -08:00
committed by GitHub
parent 325ab6b0a8
commit 79028d4388
4 changed files with 61 additions and 27 deletions

View File

@@ -108,6 +108,7 @@ def sparse_attn_indexer(
weights[chunk.token_start : chunk.token_end],
chunk.cu_seqlen_ks,
chunk.cu_seqlen_ke,
clean_logits=False,
)
num_rows = logits.shape[0]
@@ -157,6 +158,7 @@ def sparse_attn_indexer(
decode_metadata.block_table,
decode_metadata.schedule_metadata,
max_model_len=max_model_len,
clean_logits=False,
)
num_rows = logits.shape[0]