From ae4e280602f3c91d322a449f33f5aebbdd59ccc1 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 10 Feb 2026 02:41:24 -0800 Subject: [PATCH] [Bugfix] Fix FI kernel`chunk_gated_delta_rule` output shape for Qwen3.5 (#34219) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_next.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index de97daccf..d0c13dd49 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -135,7 +135,7 @@ def fi_chunk_gated_delta_rule( fi_state = initial_state.to(torch.float32) fi_g = g.to(torch.float32) fi_beta = beta.to(torch.float32) - return chunk_gated_delta_rule_fi( + output, final_state = chunk_gated_delta_rule_fi( q=q, k=k, v=v, @@ -145,6 +145,8 @@ def fi_chunk_gated_delta_rule( output_final_state=output_final_state, cu_seqlens=cu_seqlens, ) + # Unsqueeze back to 4D (1, L, H, D) to match fla output format + return output.unsqueeze(0), final_state @CustomOp.register("chunk_gated_delta_rule")