[Core] Always use tensor cores for Flashinfer Decode Wrapper (#23214)

Signed-off-by: Pavani Majety <pmajety@nvidia.com>
This commit is contained in:
Pavani Majety
2025-08-21 13:02:11 -07:00
committed by GitHub
parent 3496274663
commit 1d353b6352
5 changed files with 31 additions and 64 deletions

View File

@@ -136,9 +136,7 @@ def test_flashinfer_trtllm_decode_with_baseline(
# Baseline Decode
wrapper = flashinfer.BatchDecodeWithPagedKVCacheWrapper(
workspace_buffer,
kv_layout,
use_tensor_cores=((num_qo_heads // num_kv_heads) > 4))
workspace_buffer, kv_layout, use_tensor_cores=True)
wrapper.plan(kv_indptr,
kv_indices,
kv_last_page_lens,