[V0 deprecation] Remove V0 CPU/XPU/TPU backends (#20412)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: jiang1.li <jiang1.li@intel.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
@@ -36,7 +36,8 @@ DEVICE_REGULAR_ATTN_BACKENDS = {
|
||||
DEVICE_MLA_BLOCK_SIZES = {
|
||||
"cuda": [16, 64], # CUDA supports both standard and extended block sizes
|
||||
"hip": [16, 1], # HIP requires special handling for block_size=1
|
||||
"cpu": [16] # CPU uses fixed block size from test cases
|
||||
# "cpu": [16] # CPU uses fixed block size from test cases
|
||||
"cpu": [] # FIXME(woosuk): Temporarily disable CPU tests
|
||||
}
|
||||
|
||||
|
||||
@@ -81,14 +82,14 @@ def test_env(
|
||||
m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")
|
||||
|
||||
if device == "cpu":
|
||||
if not use_v1:
|
||||
pytest.skip("CPU backend only supports V1")
|
||||
|
||||
with patch("vllm.attention.selector.current_platform",
|
||||
CpuPlatform()):
|
||||
backend = get_attn_backend(16, torch.float16, torch.float16,
|
||||
block_size, False)
|
||||
if use_v1:
|
||||
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
|
||||
else:
|
||||
assert backend.get_name() == "TORCH_SDPA"
|
||||
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
|
||||
|
||||
elif device == "hip":
|
||||
with patch("vllm.attention.selector.current_platform",
|
||||
@@ -193,12 +194,14 @@ def test_fp32_fallback(
|
||||
m.setenv("VLLM_USE_V1", "1" if use_v1 else "0")
|
||||
|
||||
if device == "cpu":
|
||||
if not use_v1:
|
||||
pytest.skip("CPU backend only supports V1")
|
||||
|
||||
with patch("vllm.attention.selector.current_platform",
|
||||
CpuPlatform()):
|
||||
backend = get_attn_backend(16, torch.float32, torch.float32,
|
||||
16, False)
|
||||
assert (backend.get_name() == "TORCH_SDPA_VLLM_V1"
|
||||
if use_v1 else "TORCH_SDPA")
|
||||
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
|
||||
|
||||
elif device == "cuda":
|
||||
with patch("vllm.attention.selector.current_platform",
|
||||
|
||||
Reference in New Issue
Block a user