[v1] - Mamba1 Attention Metadata (#21249)
Signed-off-by: asafg <asafg@ai21.com> Co-authored-by: asafg <asafg@ai21.com>
This commit is contained in:
committed by
GitHub
parent
31f09c615f
commit
46a13949d5
@@ -19,7 +19,8 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
||||
from vllm.model_executor.layers.mamba.mamba2_metadata import (
|
||||
Mamba2Metadata, prepare_mamba2_metadata)
|
||||
from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2
|
||||
from vllm.model_executor.layers.mamba.mamba_utils import get_mamba_state_shape
|
||||
from vllm.model_executor.layers.mamba.mamba_utils import (
|
||||
MambaStateShapeCalculator)
|
||||
from vllm.model_executor.layers.quantization.base_config import (
|
||||
QuantizationConfig)
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
@@ -220,7 +221,7 @@ class Mamba2ForCausalLM(nn.Module, HasInnerState, IsAttentionFree):
|
||||
hf_config = vllm_config.model_config.hf_config
|
||||
intermediate_size = hf_config.expand * hf_config.hidden_size
|
||||
|
||||
return get_mamba_state_shape(
|
||||
return MambaStateShapeCalculator.mamba2_state_shape(
|
||||
intermediate_size=intermediate_size,
|
||||
tp_world_size=parallel_config.tensor_parallel_size,
|
||||
n_groups=hf_config.n_groups,
|
||||
|
||||
Reference in New Issue
Block a user