[Misc] Clean up cruft from previous FlashMLA sparse implementation (#26125)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson
2025-10-07 22:09:34 -04:00
committed by GitHub
parent 31a4b3e6c4
commit f80e7866c0
9 changed files with 80 additions and 141 deletions

View File

@@ -4,19 +4,12 @@ import pytest
import torch
def _cuda_sm90_available() -> bool:
if not torch.cuda.is_available():
return False
major, _ = torch.cuda.get_device_capability()
return major == 9
def test_sparse_flashmla_metadata_smoke():
import vllm.attention.ops.flashmla as fm
ok, reason = fm.is_flashmla_supported()
if not ok or not _cuda_sm90_available():
pytest.skip(reason or "SM90 not available")
ok, reason = fm.is_flashmla_sparse_supported()
if not ok:
pytest.skip(reason)
device = torch.device("cuda")
batch_size = 1
@@ -43,9 +36,9 @@ def test_sparse_flashmla_metadata_smoke():
def test_sparse_flashmla_decode_smoke():
import vllm.attention.ops.flashmla as fm
ok, reason = fm.is_flashmla_supported()
if not ok or not _cuda_sm90_available():
pytest.skip(reason or "SM90 not available")
ok, reason = fm.is_flashmla_sparse_supported()
if not ok:
pytest.skip(reason)
device = torch.device("cuda")
batch_size = 1
@@ -106,9 +99,9 @@ def test_sparse_flashmla_decode_smoke():
def test_sparse_flashmla_prefill_smoke():
import vllm.attention.ops.flashmla as fm
ok, reason = fm.is_flashmla_supported()
if not ok or not _cuda_sm90_available():
pytest.skip(reason or "SM90 not available")
ok, reason = fm.is_flashmla_sparse_supported()
if not ok:
pytest.skip(reason)
device = torch.device("cuda")
s_q = 1