Add tree attention backend for v1 (part 1) (#20401)

Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
This commit is contained in:
Giancarlo Delfin
2025-08-03 22:13:26 -07:00
committed by GitHub
parent c2e75b3c11
commit aa7012eb6d
12 changed files with 1098 additions and 25 deletions

View File

@@ -109,11 +109,11 @@ def create_common_attn_metadata(
def get_attention_backend(backend_name: _Backend):
"""Set up attention backend classes for testing.
Args:
backend_name: Name of the backend ("flash_attn", "flashinfer", etc.)
vllm_config: VllmConfig instance
Returns:
Tuple of (backend_builder_class, backend_impl_class)
"""
@@ -126,6 +126,8 @@ def get_attention_backend(backend_name: _Backend):
"vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
_Backend.TRITON_ATTN_VLLM_V1:
"vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
_Backend.TREE_ATTN:
"vllm.v1.attention.backends.tree_attn.TreeAttentionBackend",
}
if backend_name not in backend_map: