[V1]SupportsV0Only protocol for model definitions (#13959)

Signed-off-by: Roger Wang <ywang@roblox.com>
2025-02-27 17:02:15 -08:00
parent 67fc426845
commit 6c85da3a18
19 changed files with 93 additions and 32 deletions
--- a/vllm/model_executor/models/mamba.py
+++ b/vllm/model_executor/models/mamba.py
@@ -19,7 +19,8 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
    DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models.interfaces import (HasInnerState,
-                                                   IsAttentionFree, SupportsPP)
+                                                   IsAttentionFree, SupportsPP,
+                                                   SupportsV0Only)
 from vllm.model_executor.models.mamba_cache import (MambaCacheManager,
                                                    MambaCacheParams)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -155,7 +156,8 @@ class MambaModel(nn.Module):
        return hidden_states


-class MambaForCausalLM(nn.Module, HasInnerState, IsAttentionFree, SupportsPP):
+class MambaForCausalLM(nn.Module, HasInnerState, IsAttentionFree, SupportsPP,
+                       SupportsV0Only):

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        config = vllm_config.model_config.hf_config