diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index 411d11e5a..92795188c 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -363,7 +363,6 @@ class Attention(nn.Module, AttentionLayerBase):
                 output_shape = torch.Size(
                     (num_tokens, self.num_heads * self.head_size_v)
                 )
-            output_shape = output_shape if output_shape is not None else query.shape
             output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
             hidden_size = output_shape[-1]
             # Reshape the query, key, and value tensors.