Add the support for the qwen3 next model (a hybrid attention model). (#24526)
Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -312,7 +312,8 @@ class MambaModelConfig(VerifyAndUpdateConfig):
|
||||
|
||||
# TODO(tdoublep): remove as full cuda graph support is added
|
||||
FCG_NOT_SUPPORTED_MODELS = [
|
||||
"Lfm2ForCausalLM", "MiniMaxText01ForCausalLM"
|
||||
"Lfm2ForCausalLM",
|
||||
"MiniMaxText01ForCausalLM",
|
||||
]
|
||||
|
||||
if (model_config.architecture not in FCG_NOT_SUPPORTED_MODELS
|
||||
|
||||
Reference in New Issue
Block a user