[CI Failure] Fix backend selection for encoder-only models (#28534)
Signed-off-by: Huamin Li <3ericli@gmail.com>
This commit is contained in:
@@ -76,6 +76,7 @@ def get_attn_backend(
|
||||
use_mla: bool = False,
|
||||
has_sink: bool = False,
|
||||
use_sparse: bool = False,
|
||||
attn_type: str | None = None,
|
||||
) -> type[AttentionBackend]:
|
||||
"""Selects which attention backend to use and lazily imports it."""
|
||||
|
||||
@@ -94,6 +95,7 @@ def get_attn_backend(
|
||||
use_mla=use_mla,
|
||||
has_sink=has_sink,
|
||||
use_sparse=use_sparse,
|
||||
attn_type=attn_type,
|
||||
)
|
||||
|
||||
|
||||
@@ -106,6 +108,7 @@ def _cached_get_attn_backend(
|
||||
use_mla: bool = False,
|
||||
has_sink: bool = False,
|
||||
use_sparse: bool = False,
|
||||
attn_type: str | None = None,
|
||||
) -> type[AttentionBackend]:
|
||||
# Check whether a particular choice of backend was
|
||||
# previously forced.
|
||||
@@ -159,6 +162,7 @@ def _cached_get_attn_backend(
|
||||
use_mla,
|
||||
has_sink,
|
||||
use_sparse,
|
||||
attn_type,
|
||||
)
|
||||
else:
|
||||
attention_cls = current_platform.get_attn_backend_cls(
|
||||
@@ -170,6 +174,7 @@ def _cached_get_attn_backend(
|
||||
use_mla,
|
||||
has_sink,
|
||||
use_sparse,
|
||||
attn_type,
|
||||
)
|
||||
if not attention_cls:
|
||||
raise ValueError(
|
||||
|
||||
Reference in New Issue
Block a user