[Platform] Do not raise error if _Backend is not found (#12023)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: Mengqing Cao <cmq0113@163.com>
Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
wangxiyuan
2025-01-15 18:14:15 +08:00
committed by GitHub
parent ad388d25a8
commit 3adf0ffda8
6 changed files with 49 additions and 16 deletions

View File

@@ -94,7 +94,12 @@ def test_flash_attn(monkeypatch):
def test_invalid_env(monkeypatch):
"""Throw an exception if the backend name is invalid."""
"""Ignore the invalid env variable if it is set."""
override_backend_env_variable(monkeypatch, STR_INVALID_VAL)
with pytest.raises(ValueError):
get_attn_backend(16, torch.float16, None, 16, False)
with patch("vllm.attention.selector.current_platform", CudaPlatform()):
backend = get_attn_backend(32, torch.float16, None, 16, False)
assert backend.get_name() == "FLASH_ATTN"
# when block size == 16, backend will fall back to XFORMERS
backend = get_attn_backend(16, torch.float16, None, 16, False)
assert backend.get_name() == "XFORMERS"