Signed-off-by: Roi Koren <roik@nvidia.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -148,10 +148,12 @@ class NemotronHMoE(nn.Module):
|
||||
|
||||
self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe
|
||||
|
||||
router_logits_dtype = torch.float32
|
||||
self.gate = ReplicatedLinear(
|
||||
config.hidden_size,
|
||||
config.n_routed_experts,
|
||||
bias=False,
|
||||
params_dtype=router_logits_dtype,
|
||||
quant_config=None,
|
||||
prefix=f"{prefix}.gate",
|
||||
)
|
||||
@@ -230,6 +232,7 @@ class NemotronHMoE(nn.Module):
|
||||
enable_eplb=self.enable_eplb,
|
||||
num_redundant_experts=self.n_redundant_experts,
|
||||
is_sequence_parallel=self.is_sequence_parallel,
|
||||
router_logits_dtype=router_logits_dtype,
|
||||
routed_input_transform=self.fc1_latent_proj,
|
||||
)
|
||||
|
||||
@@ -241,7 +244,7 @@ class NemotronHMoE(nn.Module):
|
||||
hidden_states = sequence_parallel_chunk(hidden_states)
|
||||
|
||||
# router_logits: (num_tokens, n_experts)
|
||||
router_logits, _ = self.gate(hidden_states)
|
||||
router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32))
|
||||
|
||||
# SharedFusedMoE handles:
|
||||
# - shared experts (with original hidden_states)
|
||||
|
||||
Reference in New Issue
Block a user