[BugFix][V1][ROCm] Triton MLA uses V0 backend on V1 engine (#19067)
Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com>
This commit is contained in:
@@ -186,8 +186,14 @@ class RocmPlatform(Platform):
|
||||
|
||||
if selected_backend == _Backend.TRITON_MLA:
|
||||
if block_size != 1:
|
||||
logger.info("Using Triton MLA backend.")
|
||||
return "vllm.attention.backends.triton_mla.TritonMLABackend" # noqa: E501
|
||||
if use_v1:
|
||||
logger.info_once(
|
||||
"Using Triton MLA backend on V1 engine.")
|
||||
return ("vllm.v1.attention.backends.mla."
|
||||
"triton_mla.TritonMLABackend")
|
||||
else:
|
||||
logger.info("Using Triton MLA backend.")
|
||||
return "vllm.attention.backends.triton_mla.TritonMLABackend" # noqa: E501
|
||||
else:
|
||||
raise ValueError(
|
||||
f" The selected backend, {selected_backend.name},"
|
||||
|
||||
Reference in New Issue
Block a user