[CI Failure] Fix backend selection for encoder-only models (#28534)

Signed-off-by: Huamin Li <3ericli@gmail.com>
This commit is contained in:
Huamin Li
2025-11-13 07:11:27 -08:00
committed by GitHub
parent a7791eac9d
commit 07a606aa7e
14 changed files with 75 additions and 6 deletions

View File

@@ -76,6 +76,7 @@ def get_attn_backend(
use_mla: bool = False,
has_sink: bool = False,
use_sparse: bool = False,
attn_type: str | None = None,
) -> type[AttentionBackend]:
"""Selects which attention backend to use and lazily imports it."""
@@ -94,6 +95,7 @@ def get_attn_backend(
use_mla=use_mla,
has_sink=has_sink,
use_sparse=use_sparse,
attn_type=attn_type,
)
@@ -106,6 +108,7 @@ def _cached_get_attn_backend(
use_mla: bool = False,
has_sink: bool = False,
use_sparse: bool = False,
attn_type: str | None = None,
) -> type[AttentionBackend]:
# Check whether a particular choice of backend was
# previously forced.
@@ -159,6 +162,7 @@ def _cached_get_attn_backend(
use_mla,
has_sink,
use_sparse,
attn_type,
)
else:
attention_cls = current_platform.get_attn_backend_cls(
@@ -170,6 +174,7 @@ def _cached_get_attn_backend(
use_mla,
has_sink,
use_sparse,
attn_type,
)
if not attention_cls:
raise ValueError(