[Platform] Add output for Attention Backend (#11981)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-01-14 21:27:04 +08:00
committed by GitHub
parent 1f18adb245
commit 2e0e017610
4 changed files with 9 additions and 5 deletions

View File

@@ -15,6 +15,8 @@ from vllm.vllm_flash_attn import flash_attn_varlen_func
class FlashAttentionBackend(AttentionBackend):
accept_output_buffer: bool = True
@staticmethod
def get_supported_head_sizes() -> List[int]:
return [32, 64, 96, 128, 160, 192, 224, 256]