[Attention][Bugfix] Fix FA sink support (#28660)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni
2025-11-13 12:20:01 -06:00
committed by GitHub
parent 119c4927b3
commit f9f3b596f3

View File

@@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend):
return flash_attn_supports_fp8()
return kv_cache_dtype in ["auto"]
@classmethod
def supports_sink(cls) -> bool:
if not is_flash_attn_varlen_func_available():
return False
return flash_attn_supports_sinks()
@classmethod
def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
return capability >= DeviceCapability(8, 0)