[CI Failure] Fix backend selection for encoder-only models (#28534)

Signed-off-by: Huamin Li <3ericli@gmail.com>
2025-11-13 07:11:27 -08:00
parent a7791eac9d
commit 07a606aa7e
14 changed files with 75 additions and 6 deletions
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -76,6 +76,7 @@ def get_attn_backend(
    use_mla: bool = False,
    has_sink: bool = False,
    use_sparse: bool = False,
+    attn_type: str | None = None,
 ) -> type[AttentionBackend]:
    """Selects which attention backend to use and lazily imports it."""

@@ -94,6 +95,7 @@ def get_attn_backend(
        use_mla=use_mla,
        has_sink=has_sink,
        use_sparse=use_sparse,
+        attn_type=attn_type,
    )


@@ -106,6 +108,7 @@ def _cached_get_attn_backend(
    use_mla: bool = False,
    has_sink: bool = False,
    use_sparse: bool = False,
+    attn_type: str | None = None,
 ) -> type[AttentionBackend]:
    # Check whether a particular choice of backend was
    # previously forced.
@@ -159,6 +162,7 @@ def _cached_get_attn_backend(
            use_mla,
            has_sink,
            use_sparse,
+            attn_type,
        )
    else:
        attention_cls = current_platform.get_attn_backend_cls(
@@ -170,6 +174,7 @@ def _cached_get_attn_backend(
            use_mla,
            has_sink,
            use_sparse,
+            attn_type,
        )
    if not attention_cls:
        raise ValueError(