From f04d604567924652e7b4533bbcd5e0c358d578e5 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Tue, 17 Jun 2025 23:59:27 -0700 Subject: [PATCH] [Minor] Zero-initialize attn output buffer (#19784) Signed-off-by: Woosuk Kwon --- vllm/attention/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 6d9c6f51b..f7d230c5d 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -209,7 +209,7 @@ class Attention(nn.Module): if self.use_output: output_shape = (output_shape if output_shape is not None else query.shape) - output = torch.empty(output_shape, + output = torch.zeros(output_shape, dtype=query.dtype, device=query.device) hidden_size = output_shape[-1]