diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 411d11e5a..92795188c 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -363,7 +363,6 @@ class Attention(nn.Module, AttentionLayerBase): output_shape = torch.Size( (num_tokens, self.num_heads * self.head_size_v) ) - output_shape = output_shape if output_shape is not None else query.shape output = torch.empty(output_shape, dtype=output_dtype, device=query.device) hidden_size = output_shape[-1] # Reshape the query, key, and value tensors.