diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index 37bb568dd..1e06ace63 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -690,9 +690,9 @@ class SiglipMultiheadAttentionPoolingHead(nn.Module): hidden_state = self.mlp(hidden_state) hidden_state += residual - pooled = hidden_state[:, 0] - - return pooled.unsqueeze(1) + # Handled by resolve_visual_encoder_outputs + # return hidden_state[:, 0] + return hidden_state class SiglipVisionTransformer(nn.Module):