[CI Perf] Prune tests in tests/kernels/attention/ (#22936)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -29,17 +29,14 @@ MAX_SEQ_LEN = get_max_shared_memory_bytes() // FLOAT32_BYTES - 512
|
||||
NUM_BLOCKS = 4321 # Arbitrary values for testing
|
||||
PARTITION_SIZE = 512
|
||||
PARTITION_SIZE_ROCM = 256
|
||||
# flshattF and tritonflashattF supported: {torch.float16, torch.bfloat16}
|
||||
DTYPES = [
|
||||
torch.half, torch.bfloat16, torch.float
|
||||
] if not current_platform.is_rocm() else [torch.half, torch.bfloat16]
|
||||
DTYPES = [torch.bfloat16]
|
||||
NUM_GEN_SEQS = [7] # Arbitrary values for testing
|
||||
NUM_PREFILL_SEQS = [3] # Arbitrary values for testing
|
||||
NUM_HEADS = [(40, 40), (64, 8)] # Arbitrary values for testing
|
||||
|
||||
# This should be sync with get_supported_head_sizes() in
|
||||
# vllm.attention.ops.paged_attn.PagedAttention
|
||||
HEAD_SIZES = [32, 64, 80, 96, 112, 120, 128, 192, 256]
|
||||
HEAD_SIZES = [32, 80, 128, 256]
|
||||
|
||||
BLOCK_SIZES = [16, 32]
|
||||
USE_ALIBI = [False, True]
|
||||
|
||||
Reference in New Issue
Block a user