[Platform] Do not raise error if _Backend is not found (#12023)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: Mengqing Cao <cmq0113@163.com>
Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
wangxiyuan
2025-01-15 18:14:15 +08:00
committed by GitHub
parent ad388d25a8
commit 3adf0ffda8
6 changed files with 49 additions and 16 deletions

View File

@@ -0,0 +1,8 @@
from vllm.attention.backends.flash_attn import FlashAttentionBackend
class DummyAttentionBackend(FlashAttentionBackend):
@staticmethod
def get_name() -> str:
return "Dummy_Backend"

View File

@@ -3,3 +3,7 @@ from vllm.platforms.cuda import CudaPlatform
class DummyPlatform(CudaPlatform):
device_name = "DummyDevice"
def get_attn_backend_cls(self, backend_name, head_size, dtype,
kv_cache_dtype, block_size, use_v1):
return "vllm_add_dummy_platform.dummy_attention_backend.DummyAttentionBackend" # noqa E501