[BugFix] Support EP/DP + EPLB with MTP (#25311)
Signed-off-by: ilmarkov <markovilya197@gmail.com> Signed-off-by: Sage Moore <sage@neuralmagic.com> Co-authored-by: Sage Moore <sage@neuralmagic.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
This commit is contained in:
@@ -65,6 +65,7 @@ from vllm.sequence import IntermediateTensors
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import (
|
||||
MixtureOfExperts,
|
||||
MultiModalEmbeddings,
|
||||
SupportsEagle3,
|
||||
SupportsMultiModal,
|
||||
@@ -723,7 +724,7 @@ class Mllama4DummyInputsBuilder(BaseDummyInputsBuilder[Mllama4ProcessingInfo]):
|
||||
dummy_inputs=Mllama4DummyInputsBuilder,
|
||||
)
|
||||
class Llama4ForConditionalGeneration(
|
||||
nn.Module, SupportsMultiModal, SupportsPP, SupportsEagle3
|
||||
nn.Module, SupportsMultiModal, SupportsPP, MixtureOfExperts, SupportsEagle3
|
||||
):
|
||||
merge_by_field_config = True
|
||||
|
||||
@@ -776,6 +777,17 @@ class Llama4ForConditionalGeneration(
|
||||
self.language_model.make_empty_intermediate_tensors
|
||||
)
|
||||
|
||||
# Set MoE hyperparameters
|
||||
self.num_expert_groups = 1
|
||||
self.num_logical_experts = self.language_model.num_logical_experts
|
||||
self.num_physical_experts = self.language_model.num_physical_experts
|
||||
self.num_local_physical_experts = self.language_model.num_local_physical_experts
|
||||
self.num_routed_experts = self.language_model.num_routed_experts
|
||||
self.num_shared_experts = self.language_model.num_shared_experts
|
||||
self.num_redundant_experts = self.language_model.num_redundant_experts
|
||||
self.moe_layers = self.language_model.moe_layers
|
||||
self.num_moe_layers = len(self.moe_layers)
|
||||
|
||||
def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None:
|
||||
"""Set which layers should output auxiliary hidden states for EAGLE3."""
|
||||
# Delegate to underlying language model (Llama4ForCausalLM)
|
||||
@@ -792,6 +804,24 @@ class Llama4ForConditionalGeneration(
|
||||
assert hasattr(self.language_model, "get_eagle3_aux_hidden_state_layers")
|
||||
return self.language_model.get_eagle3_aux_hidden_state_layers()
|
||||
|
||||
def set_eplb_state(
|
||||
self,
|
||||
expert_load_view: torch.Tensor,
|
||||
logical_to_physical_map: torch.Tensor,
|
||||
logical_replica_count: torch.Tensor,
|
||||
):
|
||||
self.language_model.set_eplb_state(
|
||||
expert_load_view, logical_to_physical_map, logical_replica_count
|
||||
)
|
||||
self.expert_weights = self.language_model.expert_weights
|
||||
|
||||
def update_physical_experts_metadata(
|
||||
self, num_physical_experts: int, num_local_physical_experts: int
|
||||
):
|
||||
self.language_model.update_physical_experts_metadata(
|
||||
num_physical_experts, num_local_physical_experts
|
||||
)
|
||||
|
||||
def _parse_and_validate_image_input(
|
||||
self, **kwargs: object
|
||||
) -> Llama4ImagePatchInputs | None:
|
||||
|
||||
Reference in New Issue
Block a user