[V0 deprecation] Deprecate use_v1 parameter (#28112)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-11-12 22:03:52 +08:00
committed by GitHub
parent a9d18b5107
commit 10138c92a5
8 changed files with 31 additions and 35 deletions

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import inspect
import os
from collections.abc import Generator
from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
# get device-specific attn_backend
from vllm.platforms import current_platform
attention_cls = current_platform.get_attn_backend_cls(
selected_backend,
head_size,
dtype,
kv_cache_dtype,
block_size,
True,
use_mla,
has_sink,
use_sparse,
)
sig = inspect.signature(current_platform.get_attn_backend_cls)
if "use_v1" in sig.parameters:
logger.warning_once(
"use_v1 parameter for get_attn_backend_cls is deprecated and will "
"be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
"remove it from your plugin code."
)
attention_cls = current_platform.get_attn_backend_cls(
selected_backend,
head_size,
dtype,
kv_cache_dtype,
block_size,
True, # use_v1
use_mla,
has_sink,
use_sparse,
)
else:
attention_cls = current_platform.get_attn_backend_cls(
selected_backend,
head_size,
dtype,
kv_cache_dtype,
block_size,
use_mla,
has_sink,
use_sparse,
)
if not attention_cls:
raise ValueError(
f"Invalid attention backend for {current_platform.device_name}"