[CPU] V1 support for the CPU backend (#16441)

This commit is contained in:
Li, Jiang
2025-06-04 09:43:01 +08:00
committed by GitHub
parent 52dceb172d
commit 4555143ea7
15 changed files with 465 additions and 40 deletions

View File

@@ -1399,6 +1399,7 @@ class EngineArgs:
"FLASHINFER",
"FLASHINFER_VLLM_V1",
"ROCM_AITER_MLA",
"TORCH_SDPA_VLLM_V1",
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
@@ -1431,7 +1432,8 @@ class EngineArgs:
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
v0_hardware = not any(
(current_platform.is_cuda(), current_platform.is_tpu()))
(current_platform.is_cuda(), current_platform.is_tpu(),
current_platform.is_cpu()))
if v0_hardware and _warn_or_fallback( # noqa: SIM103
current_platform.device_name):
return False