[MM][Core] Decouple ViT backend from LM backend (#27061)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang
2025-10-21 00:30:10 -07:00
committed by GitHub
parent 72f431e709
commit c3a2c6ac5f
16 changed files with 230 additions and 17 deletions

View File

@@ -78,10 +78,18 @@ def get_vision_encoder_info(hf_config: VisionLanguageConfig) -> VisionEncoderInf
raise NotImplementedError(msg)
def get_vit_attn_backend(head_size: int, dtype: torch.dtype) -> _Backend:
def get_vit_attn_backend(
head_size: int,
dtype: torch.dtype,
*,
attn_backend_override: _Backend | None = None,
) -> _Backend:
"""
Get the available attention backend for Vision Transformer.
"""
if attn_backend_override is not None:
return attn_backend_override
# Lazy import to avoid circular dependency
from vllm.attention.selector import get_env_variable_attn_backend