[Platform] Do not raise error if _Backend is not found (#12023)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: Mengqing Cao <cmq0113@163.com> Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
from vllm.attention.backends.flash_attn import FlashAttentionBackend
|
||||
|
||||
|
||||
class DummyAttentionBackend(FlashAttentionBackend):
|
||||
|
||||
@staticmethod
|
||||
def get_name() -> str:
|
||||
return "Dummy_Backend"
|
||||
@@ -3,3 +3,7 @@ from vllm.platforms.cuda import CudaPlatform
|
||||
|
||||
class DummyPlatform(CudaPlatform):
|
||||
device_name = "DummyDevice"
|
||||
|
||||
def get_attn_backend_cls(self, backend_name, head_size, dtype,
|
||||
kv_cache_dtype, block_size, use_v1):
|
||||
return "vllm_add_dummy_platform.dummy_attention_backend.DummyAttentionBackend" # noqa E501
|
||||
|
||||
Reference in New Issue
Block a user