Revert "[V0 deprecation] Remove V0 CPU/XPU/TPU backends (#20412)"
Some checks failed
Create Release / Create Release (push) Has been cancelled

This reverts commit e202dd2736.
This commit is contained in:
simon-mo
2025-07-06 14:02:36 -07:00
parent c18b3b8e8b
commit a5dd03c1eb
20 changed files with 5034 additions and 46 deletions

View File

@@ -36,8 +36,7 @@ DEVICE_REGULAR_ATTN_BACKENDS = {
DEVICE_MLA_BLOCK_SIZES = {
"cuda": [16, 64], # CUDA supports both standard and extended block sizes
"hip": [16, 1], # HIP requires special handling for block_size=1
# "cpu": [16] # CPU uses fixed block size from test cases
"cpu": [] # FIXME(woosuk): Temporarily disable CPU tests
"cpu": [16] # CPU uses fixed block size from test cases
}
@@ -82,14 +81,14 @@ def test_env(
m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")
if device == "cpu":
if not use_v1:
pytest.skip("CPU backend only supports V1")
with patch("vllm.attention.selector.current_platform",
CpuPlatform()):
backend = get_attn_backend(16, torch.float16, torch.float16,
block_size, False)
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
if use_v1:
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
else:
assert backend.get_name() == "TORCH_SDPA"
elif device == "hip":
with patch("vllm.attention.selector.current_platform",
@@ -205,14 +204,12 @@ def test_fp32_fallback(
m.setenv("VLLM_USE_V1", "1" if use_v1 else "0")
if device == "cpu":
if not use_v1:
pytest.skip("CPU backend only supports V1")
with patch("vllm.attention.selector.current_platform",
CpuPlatform()):
backend = get_attn_backend(16, torch.float32, torch.float32,
16, False)
assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
assert (backend.get_name() == "TORCH_SDPA_VLLM_V1"
if use_v1 else "TORCH_SDPA")
elif device == "cuda":
with patch("vllm.attention.selector.current_platform",