[BugFix] Support online dense model DP without overhead (#30739)

Signed-off-by: Nick Hill <nhill@redhat.com>
Signed-off-by: njhill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-01-02 07:36:38 -08:00
committed by GitHub
parent 08f425bad1
commit bd877162eb
20 changed files with 345 additions and 146 deletions

View File

@@ -642,7 +642,7 @@ class ModelConfig:
cls = "Transformers"
# If 'hf_config != hf_text_config' it's a nested config, i.e. multimodal
cls += "MultiModal" if self.hf_config != self.hf_text_config else ""
cls += "MoE" if self.get_num_experts() > 1 else ""
cls += "MoE" if self.is_moe else ""
# Check if the architecture we're wrapping has defaults
runner = None
task = None
@@ -1001,8 +1001,7 @@ class ModelConfig:
self.enforce_eager = True
def _verify_with_expert_parallelism(self) -> None:
num_experts = self.get_num_experts()
if num_experts < 1:
if not self.is_moe:
raise ValueError(
"Number of experts in the model must be greater than 0 "
"when expert parallelism is enabled."
@@ -1797,11 +1796,11 @@ class ModelConfig:
logger.debug("Generative models support prefix caching.")
return True
def is_model_moe(
self,
) -> bool:
return self.get_num_experts() > 1
@property
def is_moe(self) -> bool:
return self.get_num_experts() > 0
@property
def is_quantized(self) -> bool:
return getattr(self.hf_config, "quantization_config", None) is not None