[model] Add support for openPangu7B-VL (#32449)

Signed-off-by: hujiaxin <524446785@qq.com> Signed-off-by: Emilie1001 <79921183+Emilie1001@users.noreply.github.com> Co-authored-by: Emilie1001 <79921183+Emilie1001@users.noreply.github.com>
2026-01-30 15:54:27 +08:00
parent 9432ed8c7e
commit ba45bedfd1
9 changed files with 1601 additions and 1 deletions
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -705,6 +705,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `Molmo2ForConditionalGeneration` | Molmo2 | T + I<sup>+</sup> / V | `allenai/Molmo2-4B`, `allenai/Molmo2-8B`, `allenai/Molmo2-O-7B` | ✅︎ | ✅︎ |
 | `NVLM_D_Model` | NVLM-D 1.0 | T + I<sup>+</sup> | `nvidia/NVLM-D-72B`, etc. | | ✅︎ |
 | `OpenCUAForConditionalGeneration` | OpenCUA-7B | T + I<sup>E+</sup> | `xlangai/OpenCUA-7B` | ✅︎ | ✅︎ |
+| `OpenPanguVLForConditionalGeneration` | openpangu-VL | T + I<sup>E+</sup> + V<sup>E+</sup> |`FreedomIntelligence/openPangu-VL-7B` | ✅︎ | ✅︎ |
 | `Ovis` | Ovis2, Ovis1.6 | T + I<sup>+</sup> | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc. | | ✅︎ |
 | `Ovis2_5` | Ovis2.5 | T + I<sup>+</sup> + V | `AIDC-AI/Ovis2.5-9B`, etc. | | |
 | `PaddleOCRVLForConditionalGeneration` | Paddle-OCR | T + I<sup>+</sup> | `PaddlePaddle/PaddleOCR-VL`, etc. | | |
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -1394,6 +1394,37 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
    )


+# OpenPangu
+def run_openpangu_vl(questions: list[str], modality: str) -> ModelRequestData:
+    model_name = "FreedomIntelligence/openPangu-VL-7B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=4,
+        trust_remote_code=True,
+        enforce_eager=True,
+        limit_mm_per_prompt={modality: 1},
+    )
+
+    if modality == "image":
+        placeholder = "[unused19]"
+    elif modality == "video":
+        placeholder = "[unused32]"
+
+    prompts = [
+        (
+            f"<s>[unused9]系统：[unused10][unused9]用户：[unused18]{placeholder}[unused20]{question}[unused10][unused9]助手："
+        )
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 # Ovis
 def run_ovis(questions: list[str], modality: str) -> ModelRequestData:
    assert modality == "image"
@@ -2051,6 +2082,7 @@ model_example_map = {
    "molmo2": run_molmo2,
    "nemotron_vl": run_nemotron_vl,
    "NVLM_D": run_nvlm_d,
+    "openpangu_vl": run_openpangu_vl,
    "ovis": run_ovis,
    "ovis2_5": run_ovis2_5,
    "paddleocr_vl": run_paddleocr_vl,
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@@ -765,6 +765,32 @@ def load_nvlm_d(question: str, image_urls: list[str]) -> ModelRequestData:
    )


+# OpenPangu
+def load_openpangu_vl(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "FreedomIntelligence/openPangu-VL-7B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=8192,
+        max_num_seqs=2,
+        enforce_eager=True,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+
+    placeholders = "[unused18][unused19][unused20]" * len(image_urls)
+    prompt = (
+        f"<s>[unused9]系统：[unused10][unused9]用户：{question}{placeholders}"
+        "[unused10][unused9]助手："
+    )
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 # Ovis
 def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
    model_name = "AIDC-AI/Ovis2-1B"
@@ -1388,6 +1414,7 @@ model_example_map = {
    "mistral3": load_mistral3,
    "molmo2": load_molmo2,
    "NVLM_D": load_nvlm_d,
+    "openpangu_vl": load_openpangu_vl,
    "ovis": load_ovis,
    "ovis2_5": load_ovis2_5,
    "paddleocr_vl": load_paddleocr_vl,
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -873,6 +873,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
        "xlangai/OpenCUA-7B", trust_remote_code=True
    ),
+    "OpenPanguVLForConditionalGeneration": _HfExamplesInfo(
+        "FreedomIntelligence/openPangu-VL-7B",
+        trust_remote_code=True,
+        max_model_len=4096,
+        enforce_eager=True,
+    ),
    "Ovis": _HfExamplesInfo(
        "AIDC-AI/Ovis2-1B",
        trust_remote_code=True,
--- a/vllm/model_executor/layers/rotary_embedding/init.py
+++ b/vllm/model_executor/layers/rotary_embedding/init.py
@@ -15,6 +15,7 @@ from .linear_scaling_rope import LinearScalingRotaryEmbedding
 from .llama3_rope import Llama3RotaryEmbedding
 from .llama4_vision_rope import Llama4VisionRotaryEmbedding
 from .mrope import MRotaryEmbedding
+from .mrope_interleaved import MRotaryEmbeddingInterleaved
 from .ntk_scaling_rope import NTKScalingRotaryEmbedding
 from .phi3_long_rope_scaled_rope import Phi3LongRoPEScaledRotaryEmbedding
 from .xdrope import XDRotaryEmbedding
@@ -283,6 +284,21 @@ def get_rope(
            long_factor,
            **extra_kwargs,
        )
+    elif scaling_type == "openpangu":
+        mrope_interleaved = rope_parameters.get("mrope_interleaved", False)
+        if "mrope_section" in rope_parameters and mrope_interleaved:
+            rotary_emb = MRotaryEmbeddingInterleaved(
+                head_size,
+                rotary_dim,
+                max_position,
+                base,
+                is_neox_style,
+                dtype,
+                mrope_section=rope_parameters["mrope_section"],
+                mrope_interleaved=mrope_interleaved,
+            )
+        else:
+            raise ValueError("Pangu mrope lacks necessary parameters.")
    else:
        raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
    _ROPE_DICT[key] = rotary_emb
--- a/vllm/model_executor/layers/rotary_embedding/mrope_interleaved.py
+++ b/vllm/model_executor/layers/rotary_embedding/mrope_interleaved.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Adapted from vllm/model_executor/layers/rotary_embedding/__init__.py
+# Copyright 2023 The vLLM team.
+#
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from .mrope import MRotaryEmbedding
+
+
+# MRotaryEmbedding with interleaved
+class MRotaryEmbeddingInterleaved(MRotaryEmbedding):
+    """Rotary Embedding with Multimodal Sections and Interleaved Support."""
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+        mrope_section: list[int],
+        mrope_interleaved: bool = True,
+    ) -> None:
+        # Enlarge max_position_embeddings for video inputs
+        self.cache_max_position_num = max_position_embeddings
+        super().__init__(
+            head_size,
+            rotary_dim,
+            self.cache_max_position_num,
+            base,
+            is_neox_style,
+            dtype,
+        )
+
+        self.mrope_section = mrope_section
+        self.mrope_interleaved = mrope_interleaved
+
+        if self.mrope_section is None:
+            raise ValueError("mrope_section cannot be None.")
+        if sum(self.mrope_section) != rotary_dim // 2:
+            raise ValueError("Sum of mrope_section must equal rotary_dim // 2.")
+        if not self.mrope_interleaved:
+            raise ValueError(
+                "mrope_interleaved must be True when mrope_section is provided."
+            )
+
+        # Generate interleaved indices
+        if len(mrope_section) == 2:
+            h_num, w_num = mrope_section[0], mrope_section[1]
+            mrope_dim = self.get_mrope_interleaved_id_list(h_num, w_num, 0)
+        elif len(mrope_section) == 3:
+            t_num, h_num, w_num = mrope_section[0], mrope_section[1], mrope_section[2]
+            mrope_dim = self.get_mrope_interleaved_id_list(
+                t_num, h_num, w_num, force_last=True
+            )
+        else:
+            raise AssertionError(
+                "Cannot support the length of mrope section is not 2 or 3."
+            )
+
+        mrope_dim = mrope_dim * 2
+        self.mrope_dim = mrope_dim
+
+        self.layer_cache = None
+
+    def _rebuild_pos_emb(
+        self,
+        positions: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Interleave the rotary embedding"""
+        cos_sin = self.cos_sin_cache[positions]
+        mrope_section_3d = [1] * len(self.mrope_dim)
+        mrope_dim = self.mrope_dim
+        cos_sin = torch.cat(
+            [
+                m[mrope_dim[i]]
+                for i, m in enumerate(cos_sin.split(mrope_section_3d, dim=-1))
+            ],
+            dim=-1,
+        )
+        return cos_sin, torch.arange(cos_sin.shape[0], device=positions.device)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        """Forward pass with interleaved rotary embedding."""
+        cos_sin, positions = self._rebuild_pos_emb(positions)
+        cos, sin = cos_sin.chunk(2, dim=-1)
+
+        query_shape = query.shape
+        positions = positions.flatten()
+        num_tokens = positions.shape[0]
+        query = query.view(num_tokens, -1, self.head_size)
+        query_rot = query[..., : self.rotary_dim]
+        query_pass = query[..., self.rotary_dim :]
+        query_rot = self.apply_rotary_emb.forward_native(
+            query_rot,
+            cos,
+            sin,
+        )
+        query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
+
+        # key may be None in some cases, e.g. cross-layer KV sharing
+        if key is not None:
+            key_shape = key.shape
+            key = key.view(num_tokens, -1, self.head_size)
+            key_rot = key[..., : self.rotary_dim]
+            key_pass = key[..., self.rotary_dim :]
+            key_rot = self.apply_rotary_emb.forward_native(
+                key_rot,
+                cos,
+                sin,
+            )
+            key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
+        return query, key
+
+    @staticmethod
+    def get_mrope_interleaved_id_list(
+        a: int, b: int, c: int, force_last: bool = False
+    ) -> list[int]:
+        """
+        Generate an interleaved list of indices for multi-modal rotary embedding.
+
+        Args:
+            a: Number of indices for first modality
+            b: Number of indices for second modality
+            c: Number of indices for third modality
+            force_last: Whether to force the last element to be from the first modality
+
+        Returns:
+            List of interleaved indices
+        """
+        if force_last:
+            a -= 1
+
+        counts = {0: a, 1: b, 2: c}
+        placed = {k: 0 for k in counts}
+        rem = counts.copy()
+        seq: list[int] = []
+        last = None
+
+        total = a + b + c
+        for _ in range(total):
+            # Candidates: remaining > 0 and ≠ last
+            cands = [k for k in rem if rem[k] > 0 and k != last]
+            if not cands:
+                # If only last remains, relax the condition
+                cands = [k for k in rem if rem[k] > 0]
+
+            # Select the rarest candidate
+            try:
+                best = min(cands, key=lambda k: (placed[k] / counts[k], k))
+            except KeyError:
+                best = 0
+
+            seq.append(best)
+            placed[best] += 1
+            rem[best] -= 1
+            last = best
+
+        if force_last:
+            seq.append(0)
+
+        return seq
--- a/vllm/model_executor/models/openpangu.py
+++ b/vllm/model_executor/models/openpangu.py
@@ -537,10 +537,16 @@ class OpenPanguEmbeddedAttention(nn.Module):
        if is_gguf and config.model_type == "PanguEmbedded":
            is_neox_style = False

+        rope_parameters = config.rope_parameters or {}
+        if rope_parameters is not None and rope_parameters.get(
+            "mrope_interleaved", False
+        ):
+            rope_parameters["rope_type"] = "openpangu"
+
        self.rotary_emb = get_rope(
            self.head_dim,
            max_position=self.max_position_embeddings,
-            rope_parameters=config.rope_parameters,
+            rope_parameters=rope_parameters,
            is_neox_style=is_neox_style,
        )

--- a/vllm/model_executor/models/openpangu_vl.py
+++ b/vllm/model_executor/models/openpangu_vl.py
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -405,6 +405,10 @@ _MULTIMODAL_MODELS = {
    "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
    "Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"),
    "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
+    "OpenPanguVLForConditionalGeneration": (
+        "openpangu_vl",
+        "OpenPanguVLForConditionalGeneration",
+    ),
    "Ovis": ("ovis", "Ovis"),
    "Ovis2_5": ("ovis2_5", "Ovis2_5"),
    "PaddleOCRVLForConditionalGeneration": (