[Models] Replace all nn.Conv2d with vLLM's Conv2dLayer (#28842)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-11-19 02:56:04 +08:00
parent c64c0b78de
commit e4bb2684bc
20 changed files with 83 additions and 45 deletions
--- a/vllm/model_executor/models/qwen_vl.py
+++ b/vllm/model_executor/models/qwen_vl.py
@@ -25,6 +25,7 @@ from transformers.tokenization_utils_base import TextInput
 from vllm.config import VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.conv import Conv2dLayer
 from vllm.model_executor.layers.linear import (
    ColumnParallelLinear,
    ReplicatedLinear,
@@ -333,7 +334,7 @@ class VisionTransformer(nn.Module):
        patch_height, patch_width = self.patch_size = (patch_size, patch_size)
        self.grid_size = (image_height // patch_height, image_width // patch_width)
        self.output_dim = output_dim
-        self.conv1 = nn.Conv2d(
+        self.conv1 = Conv2dLayer(
            in_channels=3,
            out_channels=width,
            kernel_size=patch_size,