[Model] Enable encoder DP for MiniCPM-V (#23948)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
Signed-off-by: Jiangyun Zhu <riverclouds.zhu@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Jiangyun Zhu
2025-08-30 21:31:26 +08:00
committed by GitHub
parent 5490d633ce
commit 3a6acad431
2 changed files with 30 additions and 15 deletions

View File

@@ -175,7 +175,7 @@ Regardless, you need to set `mm_encoder_tp_mode="data"` in engine arguments to u
Known supported models: Known supported models:
- Llama4 (<gh-pr:18368>) - Llama4 (<gh-pr:18368>)
- MiniCPM-V-4 (<gh-pr:23327>) - MiniCPM-V-2.5 or above (<gh-pr:23327>, <gh-pr:23948>)
- Qwen2.5-VL (<gh-pr:22742>) - Qwen2.5-VL (<gh-pr:22742>)
- Step3 (<gh-pr:22697>) - Step3 (<gh-pr:22697>)

View File

@@ -977,6 +977,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
instantiated. instantiated.
""" """
supports_encoder_tp_data = True
@classmethod @classmethod
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
if modality.startswith("image"): if modality.startswith("image"):
@@ -990,6 +992,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
multimodal_config = vllm_config.model_config.multimodal_config multimodal_config = vllm_config.model_config.multimodal_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
super().__init__() super().__init__()
# All MiniCPM-V models disable `tie_word_embeddings` but # All MiniCPM-V models disable `tie_word_embeddings` but
# `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot # `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot
@@ -1237,6 +1240,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
class MiniCPMV2_0(MiniCPMVBaseModel): class MiniCPMV2_0(MiniCPMVBaseModel):
supports_encoder_tp_data = False
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
assert self.version == (2, 0) assert self.version == (2, 0)
@@ -1351,9 +1356,12 @@ class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA):
quant_config: Optional[QuantizationConfig], quant_config: Optional[QuantizationConfig],
prefix: str = "", prefix: str = "",
) -> nn.Module: ) -> nn.Module:
model = Idefics2VisionTransformer(config.vision_config, model = Idefics2VisionTransformer(
quant_config=quant_config, config.vision_config,
prefix=prefix) quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer: if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1] model.encoder.layers = model.encoder.layers[:-1]
return model return model
@@ -1441,9 +1449,12 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
prefix: str = "", prefix: str = "",
) -> nn.Module: ) -> nn.Module:
model = Idefics2VisionTransformer(config.vision_config, model = Idefics2VisionTransformer(
quant_config=quant_config, config.vision_config,
prefix=prefix) quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer: if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1] model.encoder.layers = model.encoder.layers[:-1]
return model return model
@@ -1521,8 +1532,6 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
], ],
} }
supports_encoder_tp_data = True
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
assert self.version == (4, 0) assert self.version == (4, 0)
@@ -1546,9 +1555,12 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
prefix: str = "", prefix: str = "",
) -> nn.Module: ) -> nn.Module:
quant_config = self._maybe_ignore_quant_config(quant_config) quant_config = self._maybe_ignore_quant_config(quant_config)
model = Idefics2VisionTransformer(config.vision_config, model = Idefics2VisionTransformer(
quant_config=quant_config, config.vision_config,
prefix=prefix) quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer: if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1] model.encoder.layers = model.encoder.layers[:-1]
return model return model
@@ -1652,9 +1664,12 @@ class MiniCPMV4_5(MiniCPMVBaseModel, SupportsLoRA):
prefix: str = "", prefix: str = "",
) -> nn.Module: ) -> nn.Module:
quant_config = self._maybe_ignore_quant_config(quant_config) quant_config = self._maybe_ignore_quant_config(quant_config)
model = Idefics2VisionTransformer(config.vision_config, model = Idefics2VisionTransformer(
quant_config=quant_config, config.vision_config,
prefix=prefix) quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer: if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1] model.encoder.layers = model.encoder.layers[:-1]
return model return model