[Hybrid] Added supports_mamba_prefix_caching Protocol (#27339)

Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
This commit is contained in:
Asaf Joseph Gardin
2025-10-27 15:05:20 +02:00
committed by GitHub
parent f4e8154076
commit 9273754222
10 changed files with 93 additions and 20 deletions

View File

@@ -295,17 +295,8 @@ class MambaModelConfig(VerifyAndUpdateConfig):
# override by prefix caching logic later)
cache_config.mamba_block_size = model_config.max_model_len
# TODO(@tdoublep) find a better way to do this than whitelist
MAMBA2_MODELS = [
"BambaForCausalLM",
"FalconH1ForCausalLM",
"GraniteMoeHybridForCausalLM",
"Mamba2ForCausalLM",
"NemotronHForCausalLM",
"Zamba2ForCausalLM",
]
if cache_config.enable_prefix_caching:
if model_config.architecture in MAMBA2_MODELS:
if model_config.supports_mamba_prefix_caching:
logger.info(
"Warning: Prefix caching is currently enabled. "
"Its support for Mamba2 layers is experimental. "