[Model][QwenVL] Optimize Qwen2_5_VisionAttention q,k preparation (#28769)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Lukas Geiger
2025-11-16 17:37:15 +00:00
committed by GitHub
parent ac1daf3233
commit 5a87076d6e
2 changed files with 25 additions and 27 deletions

View File

@@ -39,8 +39,8 @@ from vllm.model_executor.models.interfaces import (
)
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM
from vllm.model_executor.models.qwen2_5_vl import Qwen2_5_VisionAttention
from vllm.model_executor.models.qwen2_vl import (
Qwen2VisionAttention,
Qwen2VLDummyInputsBuilder,
Qwen2VLMultiModalProcessor,
Qwen2VLProcessingInfo,
@@ -328,7 +328,7 @@ class DotsVisionAttention(nn.Module):
# [S, C] -> [S, B=1, C]
x = hidden_states.unsqueeze(1)
x, _ = self.qkv(x)
q, k, v = Qwen2_5_VisionAttention.split_qkv(self, x)
q, k, v = Qwen2VisionAttention.split_qkv(self, x)
bs = q.shape[1]
# [S,B,H,D] -> [B,S,H,D]
q = q.permute(1, 0, 2, 3).contiguous()