[Models] Replace all nn.Conv2d with vLLM's Conv2dLayer (#28842)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2025-11-19 02:56:04 +08:00
committed by GitHub
parent c64c0b78de
commit e4bb2684bc
20 changed files with 83 additions and 45 deletions

View File

@@ -19,6 +19,7 @@ import torch.nn.functional as F
from transformers import CLIPVisionConfig
from vllm.attention.layer import MultiHeadAttention
from vllm.model_executor.layers.conv import Conv2dLayer
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
@@ -133,14 +134,14 @@ class ImageEncoderViT(nn.Module):
self.blocks.append(block)
self.neck = nn.Sequential(
nn.Conv2d(
Conv2dLayer(
embed_dim,
out_chans,
kernel_size=1,
bias=False,
),
LayerNorm2d(out_chans),
nn.Conv2d(
Conv2dLayer(
out_chans,
out_chans,
kernel_size=3,
@@ -150,8 +151,10 @@ class ImageEncoderViT(nn.Module):
LayerNorm2d(out_chans),
)
self.net_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False)
self.net_3 = nn.Conv2d(
self.net_2 = Conv2dLayer(
256, 512, kernel_size=3, stride=2, padding=1, bias=False
)
self.net_3 = Conv2dLayer(
512, 1024, kernel_size=3, stride=2, padding=1, bias=False
)
@@ -500,7 +503,7 @@ class PatchEmbed(nn.Module):
"""
super().__init__()
self.proj = nn.Conv2d(
self.proj = Conv2dLayer(
in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding
)