diff --git a/vllm/model_executor/models/qwen3_5.py b/vllm/model_executor/models/qwen3_5.py index d6df7523b..61ff6946c 100644 --- a/vllm/model_executor/models/qwen3_5.py +++ b/vllm/model_executor/models/qwen3_5.py @@ -99,6 +99,7 @@ from .interfaces import ( ) from .qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP from .qwen3_next import ( + ChunkGatedDeltaRule, Qwen3NextAttention, Qwen3NextDecoderLayer, Qwen3NextGatedDeltaNet, @@ -268,6 +269,8 @@ class Qwen3_5GatedDeltaNet(Qwen3NextGatedDeltaNet): prefix=f"{prefix}.out_proj", ) + self.chunk_gated_delta_rule = ChunkGatedDeltaRule() + compilation_config = get_current_vllm_config().compilation_config if prefix in compilation_config.static_forward_context: raise ValueError(f"Duplicate layer name: {prefix}")