[Bugfix][IPEX] Add VLLM_CPU_MOE_PREPACK to allow disabling MoE prepack when CPU does not support it (#14681)

Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg>
This commit is contained in:
Thien Tran
2025-03-14 11:43:18 +08:00
committed by GitHub
parent fb4c7f8ef0
commit 95d680b862
3 changed files with 10 additions and 1 deletions

View File

@@ -40,6 +40,7 @@ if TYPE_CHECKING:
VLLM_PP_LAYER_PARTITION: Optional[str] = None
VLLM_CPU_KVCACHE_SPACE: int = 0
VLLM_CPU_OMP_THREADS_BIND: str = ""
VLLM_CPU_MOE_PREPACK: bool = True
VLLM_OPENVINO_DEVICE: str = "CPU"
VLLM_OPENVINO_KVCACHE_SPACE: int = 0
VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: Optional[str] = None
@@ -349,6 +350,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_CPU_OMP_THREADS_BIND":
lambda: os.getenv("VLLM_CPU_OMP_THREADS_BIND", "all"),
# (CPU backend only) whether to use prepack for MoE layer. This will be
# passed to ipex.llm.modules.GatedMLPMOE. On unsupported CPUs, you might
# need to set this to "0" (False).
"VLLM_CPU_MOE_PREPACK":
lambda: bool(int(os.getenv("VLLM_CPU_MOE_PREPACK", "1"))),
# OpenVINO device selection
# default is CPU
"VLLM_OPENVINO_DEVICE":