[Kernel][CPU] CPU MLA (#14744)
Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg>
This commit is contained in:
@@ -37,6 +37,9 @@ class CpuPlatform(Platform):
|
||||
use_mla: bool) -> str:
|
||||
if selected_backend and selected_backend != _Backend.TORCH_SDPA:
|
||||
logger.info("Cannot use %s backend on CPU.", selected_backend)
|
||||
if use_mla:
|
||||
logger.info("Using CPU MLA backend.")
|
||||
return "vllm.attention.backends.cpu_mla.CPUMLABackend"
|
||||
logger.info("Using Torch SDPA backend.")
|
||||
return "vllm.attention.backends.torch_sdpa.TorchSDPABackend"
|
||||
|
||||
@@ -129,9 +132,6 @@ class CpuPlatform(Platform):
|
||||
# Disable torch async compiling which won't work with daemonic processes
|
||||
os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
|
||||
|
||||
# MLA attention is not supported
|
||||
os.environ["VLLM_MLA_DISABLE"] = "1"
|
||||
|
||||
# Intel OpenMP setting
|
||||
ld_prealod_str = os.getenv("LD_PRELOAD", "")
|
||||
if "libiomp5.so" in ld_prealod_str:
|
||||
|
||||
Reference in New Issue
Block a user