[V0 deprecation] Deprecate use_v1 parameter (#28112)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-11-12 22:03:52 +08:00
parent a9d18b5107
commit 10138c92a5
8 changed files with 31 additions and 35 deletions
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import inspect
 import os
 from collections.abc import Generator
 from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
    # get device-specific attn_backend
    from vllm.platforms import current_platform

-    attention_cls = current_platform.get_attn_backend_cls(
-        selected_backend,
-        head_size,
-        dtype,
-        kv_cache_dtype,
-        block_size,
-        True,
-        use_mla,
-        has_sink,
-        use_sparse,
-    )
+    sig = inspect.signature(current_platform.get_attn_backend_cls)
+    if "use_v1" in sig.parameters:
+        logger.warning_once(
+            "use_v1 parameter for get_attn_backend_cls is deprecated and will "
+            "be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
+            "remove it from your plugin code."
+        )
+        attention_cls = current_platform.get_attn_backend_cls(
+            selected_backend,
+            head_size,
+            dtype,
+            kv_cache_dtype,
+            block_size,
+            True,  # use_v1
+            use_mla,
+            has_sink,
+            use_sparse,
+        )
+    else:
+        attention_cls = current_platform.get_attn_backend_cls(
+            selected_backend,
+            head_size,
+            dtype,
+            kv_cache_dtype,
+            block_size,
+            use_mla,
+            has_sink,
+            use_sparse,
+        )
    if not attention_cls:
        raise ValueError(
            f"Invalid attention backend for {current_platform.device_name}"