Optimize triton unified attention performance for sliding window attention (#24390)

Signed-off-by: zixi-qi <qizixi@meta.com>
This commit is contained in:
qizixi
2025-09-19 12:07:26 -07:00
committed by GitHub
parent c59a0eca42
commit a2a5f79e09
2 changed files with 25 additions and 3 deletions

View File

@@ -83,7 +83,7 @@ def ref_paged_attn(
@pytest.mark.parametrize("num_heads", NUM_HEADS)
@pytest.mark.parametrize("head_size", HEAD_SIZES)
@pytest.mark.parametrize("block_size", BLOCK_SIZES)
@pytest.mark.parametrize("sliding_window", [None, 256])
@pytest.mark.parametrize("sliding_window", [None, 64, 128, 256])
@pytest.mark.parametrize("dtype", DTYPES)
@pytest.mark.parametrize("soft_cap", [None, 50.0])
@pytest.mark.parametrize("num_blocks", NUM_BLOCKS)