[MM Encoder]: Migrate legacy ViT MultiHeadAttention to new MMEncoderAttention interface (#30684)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2025-12-19 02:04:19 +08:00
committed by GitHub
parent 62be3670cb
commit 700a5ad6c6
20 changed files with 182 additions and 266 deletions

View File

@@ -19,7 +19,7 @@ from transformers import BatchFeature, PreTrainedTokenizer, TensorType
from transformers.image_utils import ImageInput
from transformers.tokenization_utils_base import TextInput
from vllm.attention.layer import MultiHeadAttention
from vllm.attention.layers.mm_encoder_attention import MMEncoderAttention
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.distributed import get_tensor_model_parallel_world_size
@@ -135,7 +135,7 @@ class EVA2CLIPAttention(nn.Module):
prefix=f"{prefix}.dense",
)
self.attn = MultiHeadAttention(
self.attn = MMEncoderAttention(
self.num_heads_per_rank, self.head_dim, self.scale
)
self.output_dropout = torch.nn.Dropout(config.dropout_prob)