[Hybrid] Added supports_mamba_prefix_caching Protocol (#27339)

Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
2025-10-27 15:05:20 +02:00
parent f4e8154076
commit 9273754222
10 changed files with 93 additions and 20 deletions
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -295,17 +295,8 @@ class MambaModelConfig(VerifyAndUpdateConfig):
        # override by prefix caching logic later)
        cache_config.mamba_block_size = model_config.max_model_len

-        # TODO(@tdoublep) find a better way to do this than whitelist
-        MAMBA2_MODELS = [
-            "BambaForCausalLM",
-            "FalconH1ForCausalLM",
-            "GraniteMoeHybridForCausalLM",
-            "Mamba2ForCausalLM",
-            "NemotronHForCausalLM",
-            "Zamba2ForCausalLM",
-        ]
        if cache_config.enable_prefix_caching:
-            if model_config.architecture in MAMBA2_MODELS:
+            if model_config.supports_mamba_prefix_caching:
                logger.info(
                    "Warning: Prefix caching is currently enabled. "
                    "Its support for Mamba2 layers is experimental. "