[Bugfix][Kernel] Fix CUDA 11.8 being broken by FA3 build (#12375)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
@@ -4,8 +4,10 @@ import pytest
|
||||
import torch
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.vllm_flash_attn import (flash_attn_varlen_func,
|
||||
flash_attn_with_kvcache)
|
||||
from vllm.vllm_flash_attn import (fa_version_unsupported_reason,
|
||||
flash_attn_varlen_func,
|
||||
flash_attn_with_kvcache,
|
||||
is_fa_version_supported)
|
||||
|
||||
NUM_HEADS = [(4, 4), (8, 2), (16, 2)]
|
||||
HEAD_SIZES = [128, 256]
|
||||
@@ -95,10 +97,9 @@ def test_flash_attn_with_paged_kv(
|
||||
fa_version: int,
|
||||
) -> None:
|
||||
torch.set_default_device("cuda")
|
||||
if fa_version == 3 and (torch.cuda.get_device_capability() == (8, 6)
|
||||
or torch.cuda.get_device_capability() == (8, 9)):
|
||||
pytest.skip("Flash attention version 3 fails on 8.6 and 8.9 due to "
|
||||
"insufficient shared memory for some shapes")
|
||||
if not is_fa_version_supported(fa_version):
|
||||
pytest.skip(f"Flash attention version {fa_version} not supported due "
|
||||
f"to: \"{fa_version_unsupported_reason(fa_version)}\"")
|
||||
|
||||
current_platform.seed_everything(0)
|
||||
num_seqs = len(kv_lens)
|
||||
@@ -182,11 +183,9 @@ def test_varlen_with_paged_kv(
|
||||
fa_version: int,
|
||||
) -> None:
|
||||
torch.set_default_device("cuda")
|
||||
if fa_version == 3 and (torch.cuda.get_device_capability() == (8, 6)
|
||||
or torch.cuda.get_device_capability() == (8, 9)):
|
||||
pytest.skip("Flash attention version 3 fails on 8.6 and 8.9 due to "
|
||||
"insufficient shared memory for some shapes")
|
||||
|
||||
if not is_fa_version_supported(fa_version):
|
||||
pytest.skip(f"Flash attention version {fa_version} not supported due "
|
||||
f"to: \"{fa_version_unsupported_reason(fa_version)}\"")
|
||||
current_platform.seed_everything(0)
|
||||
num_seqs = len(seq_lens)
|
||||
query_lens = [x[0] for x in seq_lens]
|
||||
|
||||
Reference in New Issue
Block a user