[Model] Support MiniCPM-o 4.5 (#33431)
Signed-off-by: caitianchi <caitianchi@modelbest.cn> Signed-off-by: tc-mb <caitianchi@modelbest.cn> Co-authored-by: mslv <mslv@baai.ac.cn>
This commit is contained in:
@@ -64,6 +64,7 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from .minicpmv import (
|
||||
_MAX_FRAMES_PER_VIDEO,
|
||||
MiniCPMV2_6,
|
||||
MiniCPMV4_5,
|
||||
MiniCPMVDummyInputsBuilder,
|
||||
MiniCPMVMultiModalDataParser,
|
||||
MiniCPMVMultiModalProcessor,
|
||||
@@ -197,7 +198,9 @@ class MiniCPMOProcessingInfo(MiniCPMVProcessingInfo):
|
||||
)
|
||||
|
||||
def get_default_audio_pool_step(self) -> int:
|
||||
return 2
|
||||
hf_config = self.get_hf_config()
|
||||
# MiniCPM-o 4.5 uses pool_step=5, older versions use 2
|
||||
return getattr(hf_config, "audio_pool_step", 2)
|
||||
|
||||
def get_default_audio_sampling_rate(self) -> int:
|
||||
return 16000
|
||||
@@ -521,12 +524,9 @@ class MiniCPMWhisperEncoder(WhisperEncoder):
|
||||
)
|
||||
|
||||
|
||||
@MULTIMODAL_REGISTRY.register_processor(
|
||||
MiniCPMOMultiModalProcessor,
|
||||
info=MiniCPMOProcessingInfo,
|
||||
dummy_inputs=MiniCPMODummyInputsBuilder,
|
||||
)
|
||||
class MiniCPMO(MiniCPMV2_6):
|
||||
class MiniCPMOBaseModel:
|
||||
"""Base mixin class for MiniCPM-O models with audio support."""
|
||||
|
||||
packed_modules_mapping = {
|
||||
"qkv_proj": [
|
||||
"q_proj",
|
||||
@@ -767,3 +767,82 @@ class MiniCPMO(MiniCPMV2_6):
|
||||
multimodal_embeddings += tuple(audio_embeddings)
|
||||
|
||||
return multimodal_embeddings
|
||||
|
||||
|
||||
class MiniCPMO2_6(MiniCPMOBaseModel, MiniCPMV2_6):
|
||||
"""MiniCPM-O 2.6 model with Qwen2 backbone."""
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||
|
||||
with self._mark_tower_model(vllm_config, "audio"):
|
||||
self.apm = self.init_audio_module(
|
||||
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "apm")
|
||||
)
|
||||
|
||||
|
||||
class MiniCPMO4_5(MiniCPMOBaseModel, MiniCPMV4_5):
|
||||
"""MiniCPM-O 4.5 model with Qwen3 backbone."""
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||
|
||||
with self._mark_tower_model(vllm_config, "audio"):
|
||||
self.apm = self.init_audio_module(
|
||||
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "apm")
|
||||
)
|
||||
|
||||
|
||||
_MINICPMO_SUPPORT_VERSION = {
|
||||
(2, 6): MiniCPMO2_6,
|
||||
(4, 5): MiniCPMO4_5,
|
||||
}
|
||||
|
||||
|
||||
@MULTIMODAL_REGISTRY.register_processor(
|
||||
MiniCPMOMultiModalProcessor,
|
||||
info=MiniCPMOProcessingInfo,
|
||||
dummy_inputs=MiniCPMODummyInputsBuilder,
|
||||
)
|
||||
class MiniCPMO(MiniCPMOBaseModel, MiniCPMV2_6):
|
||||
"""
|
||||
MiniCPM-O model with audio support.
|
||||
Different versions use different LLM backbones:
|
||||
- Version 2.6: Uses Qwen2
|
||||
- Version 4.5: Uses Qwen3
|
||||
"""
|
||||
|
||||
def __new__(cls, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
config = vllm_config.model_config.hf_config
|
||||
|
||||
# Determine version from config
|
||||
if hasattr(config, "version"):
|
||||
try:
|
||||
version_str = str(config.version)
|
||||
version_parts = version_str.split(".")
|
||||
version = tuple(int(x) for x in version_parts[:2])
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"Invalid model version format in config: {config.version}. "
|
||||
"Expected a dot-separated version string like '4.5'."
|
||||
) from e
|
||||
else:
|
||||
# Default to 2.6 for backward compatibility
|
||||
version = (2, 6)
|
||||
|
||||
# Dispatch class based on version
|
||||
instance_cls = _MINICPMO_SUPPORT_VERSION.get(version)
|
||||
if instance_cls is None:
|
||||
supported_versions = ", ".join(
|
||||
[f"{v[0]}.{v[1]}" for v in sorted(_MINICPMO_SUPPORT_VERSION.keys())]
|
||||
)
|
||||
raise ValueError(
|
||||
f"Currently, MiniCPMO only supports versions "
|
||||
f"{supported_versions}. Got version: {version}"
|
||||
)
|
||||
|
||||
return instance_cls(vllm_config=vllm_config, prefix=prefix)
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
# This __init__ won't be called due to __new__ returning a different class
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user