[NVIDIA] Add Cutlass MLA backend (#17625)

2025-06-04 12:40:26 +08:00
parent 8d646c2e53
commit 41aa578428
7 changed files with 111 additions and 3 deletions
--- a/tests/kernels/test_cutlass_mla_decode.py
+++ b/tests/kernels/test_cutlass_mla_decode.py
@@ -76,7 +76,9 @@ def test_cutlass_mla_decode(dtype: torch.dtype, mean_seq_len: int, bs: int,
    pack_factor = 128 // block_size
    block_num = ((block_num + pack_factor - 1) // pack_factor) * pack_factor

-    q = torch.randn(bs, h_q, d)
+    # Amplify input values to ensure test coverage of edge cases where CUTLASS
+    # kernel errors occur with split_k settings.
+    q = torch.randn(bs, h_q, d) * 100
    block_table = torch.randint(0,
                                bs * block_num, (bs, block_num),
                                dtype=torch.int32)