[Core] Always use tensor cores for Flashinfer Decode Wrapper (#23214)

Signed-off-by: Pavani Majety <pmajety@nvidia.com>
2025-08-21 13:02:11 -07:00
parent 3496274663
commit 1d353b6352
5 changed files with 31 additions and 64 deletions
--- a/tests/kernels/attention/test_flashinfer_trtllm_attention.py
+++ b/tests/kernels/attention/test_flashinfer_trtllm_attention.py
@@ -136,9 +136,7 @@ def test_flashinfer_trtllm_decode_with_baseline(

    # Baseline Decode
    wrapper = flashinfer.BatchDecodeWithPagedKVCacheWrapper(
-        workspace_buffer,
-        kv_layout,
-        use_tensor_cores=((num_qo_heads // num_kv_heads) > 4))
+        workspace_buffer, kv_layout, use_tensor_cores=True)
    wrapper.plan(kv_indptr,
                 kv_indices,
                 kv_last_page_lens,