diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py index 17b90c970..ff78f0886 100644 --- a/vllm/model_executor/layers/layernorm.py +++ b/vllm/model_executor/layers/layernorm.py @@ -577,7 +577,7 @@ class RMSNormGated(CustomOp): if z is not None and self.norm_before_gate: out = out * F.silu(z) - return out + return out.to(x.dtype) def forward_cuda( self, x: torch.Tensor, z: torch.Tensor | None = None