From f45870b53fc4cab2b747b04d5d3c47a68377d548 Mon Sep 17 00:00:00 2001 From: tianshu-Michael-yu <101950379+tianshu-Michael-yu@users.noreply.github.com> Date: Fri, 30 Jan 2026 00:23:14 -0800 Subject: [PATCH] fix: allow LFM2 MoE prefix caching (align) (#33376) Signed-off-by: Tianshu Yu --- vllm/model_executor/models/lfm2_moe.py | 8 +++++--- vllm/model_executor/models/lfm2_vl.py | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/lfm2_moe.py b/vllm/model_executor/models/lfm2_moe.py index 293471bba..22bd554bd 100644 --- a/vllm/model_executor/models/lfm2_moe.py +++ b/vllm/model_executor/models/lfm2_moe.py @@ -651,9 +651,11 @@ class Lfm2MoeForCausalLM( quant_config = vllm_config.quant_config cache_config = vllm_config.cache_config - assert not cache_config.enable_prefix_caching, ( - "Lfm2Moe currently does not support prefix caching" - ) + if cache_config.mamba_cache_mode == "all": + raise NotImplementedError( + "Lfm2Moe currently does not support 'all' prefix caching, " + "please use '--mamba-cache-mode=align' instead" + ) super().__init__() self.config = config diff --git a/vllm/model_executor/models/lfm2_vl.py b/vllm/model_executor/models/lfm2_vl.py index 09ac42182..532a2a913 100644 --- a/vllm/model_executor/models/lfm2_vl.py +++ b/vllm/model_executor/models/lfm2_vl.py @@ -22,6 +22,8 @@ from vllm.config import VllmConfig from vllm.config.multimodal import BaseDummyOptions from vllm.forward_context import set_forward_context from vllm.model_executor.layers.mamba.mamba_utils import ( + MambaStateCopyFunc, + MambaStateCopyFuncCalculator, MambaStateDtypeCalculator, MambaStateShapeCalculator, ) @@ -584,6 +586,10 @@ class Lfm2VLForConditionalGeneration( conv_kernel=hf_language_config.conv_L_cache, ) + @classmethod + def get_mamba_state_copy_func(cls) -> tuple[MambaStateCopyFunc]: + return MambaStateCopyFuncCalculator.short_conv_state_copy_func() + def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"): super().__init__() config: Lfm2VlConfig = vllm_config.model_config.hf_config