[Hybrid] Added supports_mamba_prefix_caching Protocol (#27339)
Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
f4e8154076
commit
9273754222
@@ -295,17 +295,8 @@ class MambaModelConfig(VerifyAndUpdateConfig):
|
||||
# override by prefix caching logic later)
|
||||
cache_config.mamba_block_size = model_config.max_model_len
|
||||
|
||||
# TODO(@tdoublep) find a better way to do this than whitelist
|
||||
MAMBA2_MODELS = [
|
||||
"BambaForCausalLM",
|
||||
"FalconH1ForCausalLM",
|
||||
"GraniteMoeHybridForCausalLM",
|
||||
"Mamba2ForCausalLM",
|
||||
"NemotronHForCausalLM",
|
||||
"Zamba2ForCausalLM",
|
||||
]
|
||||
if cache_config.enable_prefix_caching:
|
||||
if model_config.architecture in MAMBA2_MODELS:
|
||||
if model_config.supports_mamba_prefix_caching:
|
||||
logger.info(
|
||||
"Warning: Prefix caching is currently enabled. "
|
||||
"Its support for Mamba2 layers is experimental. "
|
||||
|
||||
Reference in New Issue
Block a user