Add the support for the qwen3 next model (a hybrid attention model). (#24526)

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2025-09-11 15:32:09 +08:00
parent 2048c4e379
commit e93f4cc9e3
29 changed files with 2476 additions and 61 deletions
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -312,7 +312,8 @@ class MambaModelConfig(VerifyAndUpdateConfig):

        # TODO(tdoublep): remove as full cuda graph support is added
        FCG_NOT_SUPPORTED_MODELS = [
-            "Lfm2ForCausalLM", "MiniMaxText01ForCausalLM"
+            "Lfm2ForCausalLM",
+            "MiniMaxText01ForCausalLM",
        ]

        if (model_config.architecture not in FCG_NOT_SUPPORTED_MODELS