[Platform] Add output for Attention Backend (#11981)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-01-14 21:27:04 +08:00
parent 1f18adb245
commit 2e0e017610
4 changed files with 9 additions and 5 deletions
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -15,6 +15,8 @@ from vllm.vllm_flash_attn import flash_attn_varlen_func

 class FlashAttentionBackend(AttentionBackend):

+    accept_output_buffer: bool = True
+
    @staticmethod
    def get_supported_head_sizes() -> List[int]:
        return [32, 64, 96, 128, 160, 192, 224, 256]