diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 72c4803d0..8ad16955c 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -384,6 +384,7 @@ class Qwen2_5_VisionAttention(nn.Module): qk_reshaped = einops.rearrange( qk, "b s two head head_dim -> (two b) s head head_dim", two=2 ) + qk_reshaped = qk_reshaped.contiguous() qk_rotated = self.apply_rotary_emb( qk_reshaped, rotary_pos_emb_cos, diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 93034e8cd..3803cd7b9 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -128,8 +128,8 @@ from .vision import ( logger = init_logger(__name__) -# Official recommended max pixels is 24576 * 32 * 32 -_MAX_FRAMES_PER_VIDEO = 24576 +# Official recommended max frames is 2048 +_MAX_FRAMES_PER_VIDEO = 2048 class Qwen3_VisionPatchEmbed(nn.Module):