[ROCm][CI] Fix tool use test stability - disable skinny GEMM, prefix caching, eliminate batch variance (#35553)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-06 01:15:12 -06:00
committed by GitHub
parent 5afb387bd4
commit 807d680337
5 changed files with 33 additions and 17 deletions

View File

@@ -741,6 +741,14 @@ class AiterFlashAttentionBackend(AttentionBackend):
"fp8_e5m2",
]
@classmethod
def supports_attn_type(cls, attn_type: str) -> bool:
"""ROCM AITER FA supports decoder and encoder-decoder (cross) attention."""
return attn_type in (
AttentionType.DECODER,
AttentionType.ENCODER_DECODER,
)
@staticmethod
def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
return [16, 32]