[V1] [Hybrid] Support Minimax-Text-01 in V1 (#22151)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2025-08-09 08:08:48 +02:00
committed by GitHub
parent 3157aebb63
commit 6ade99eafa
5 changed files with 234 additions and 42 deletions

View File

@@ -5,6 +5,17 @@ from vllm.distributed import divide
class MambaStateShapeCalculator:
@classmethod
def linear_attention_state_shape(
cls,
num_heads: int,
tp_size: int,
head_dim: int,
) -> tuple[tuple[int, int, int], ...]:
state_shape = (num_heads // tp_size, head_dim, head_dim)
return (state_shape, )
@classmethod
def mamba1_state_shape(
cls,