[V1] [Hybrid] Enable compile and piecewise CUDA graph for MiniMax-Text models (#22589)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
2025-08-27 19:05:16 +02:00
parent 52883ed084
commit dd58932280
2 changed files with 98 additions and 137 deletions
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -339,6 +339,7 @@ class CompilationConfig:
        "vllm.mamba_mixer2",
        "vllm.mamba_mixer",
        "vllm.short_conv",
+        "vllm.linear_attention",
    ]

    def compute_hash(self) -> str: