[Hybrid] Added supports_mamba_prefix_caching Protocol (#27339)

Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
2025-10-27 15:05:20 +02:00
parent f4e8154076
commit 9273754222
10 changed files with 93 additions and 20 deletions
--- a/vllm/model_executor/models/nemotron_h.py
+++ b/vllm/model_executor/models/nemotron_h.py
@@ -62,6 +62,7 @@ from vllm.model_executor.models.interfaces import (
    IsHybrid,
    MixtureOfExperts,
    SupportsLoRA,
+    SupportsMambaPrefixCaching,
    SupportsPP,
    SupportsQuant,
 )
@@ -695,6 +696,7 @@ class NemotronHForCausalLM(
    IsHybrid,
    SupportsQuant,
    MixtureOfExperts,
+    SupportsMambaPrefixCaching,
 ):
    hf_to_vllm_mapper = WeightsMapper(
        orig_to_new_prefix={"backbone": "model"},