[BugFix] Support EP/DP + EPLB with MTP (#25311)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
Signed-off-by: Sage Moore <sage@neuralmagic.com>
Co-authored-by: Sage Moore <sage@neuralmagic.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
This commit is contained in:
Ilya Markov
2025-11-05 16:22:17 +01:00
committed by GitHub
parent 5d16d0fa62
commit e50c454672
27 changed files with 957 additions and 529 deletions

View File

@@ -23,6 +23,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.qwen3_next import (
Qwen3NextDecoderLayer,
Qwen3NextRMSNorm,
QwenNextMixtureOfExperts,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs import Qwen3NextConfig
@@ -226,7 +227,7 @@ class Qwen3NextMultiTokenPredictor(nn.Module):
@support_torch_compile
class Qwen3NextMTP(nn.Module, SupportsPP):
class Qwen3NextMTP(nn.Module, SupportsPP, QwenNextMixtureOfExperts):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
@@ -265,6 +266,7 @@ class Qwen3NextMTP(nn.Module, SupportsPP):
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors
)
self.set_moe_parameters()
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
return self.model.get_input_embeddings(input_ids)