[Hybrid] Added supports_mamba_prefix_caching Protocol (#27339)

Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
This commit is contained in:
Asaf Joseph Gardin
2025-10-27 15:05:20 +02:00
committed by GitHub
parent f4e8154076
commit 9273754222
10 changed files with 93 additions and 20 deletions

View File

@@ -62,6 +62,7 @@ from vllm.model_executor.models.interfaces import (
IsHybrid,
MixtureOfExperts,
SupportsLoRA,
SupportsMambaPrefixCaching,
SupportsPP,
SupportsQuant,
)
@@ -695,6 +696,7 @@ class NemotronHForCausalLM(
IsHybrid,
SupportsQuant,
MixtureOfExperts,
SupportsMambaPrefixCaching,
):
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"backbone": "model"},