[Core] Always use tensor cores for Flashinfer Decode Wrapper (#23214)

Signed-off-by: Pavani Majety <pmajety@nvidia.com>
2025-08-21 13:02:11 -07:00
parent 3496274663
commit 1d353b6352
5 changed files with 31 additions and 64 deletions
--- a/benchmarks/kernels/benchmark_trtllm_decode_attention.py
+++ b/benchmarks/kernels/benchmark_trtllm_decode_attention.py
@@ -110,7 +110,7 @@ def benchmark_decode(
    wrapper = flashinfer.BatchDecodeWithPagedKVCacheWrapper(
        workspace_buffer,
        kv_layout,
-        use_tensor_cores=((num_qo_heads // num_kv_heads) > 4),
+        use_tensor_cores=True,
    )
    wrapper.plan(
        kv_indptr,