fix: moe_pipeline.py gate/up split — L1 output is 2*intermediate, not intermediate
This commit is contained in:
@@ -199,10 +199,11 @@ def run_nvfp4_moe(
|
||||
# ════════════════════════════════════════════════════════════════
|
||||
# SiLU(gate) * up (BF16 — nonlinear requires BF16)
|
||||
# ════════════════════════════════════════════════════════════════
|
||||
intermediate = l1_out.shape[1]
|
||||
gate = l1_out[:, :intermediate]
|
||||
up = l1_out[:, intermediate:]
|
||||
activated = torch.nn.functional.silu(gate) * up # (num_slots, half) BF16
|
||||
# L1 output is (tokens, 2*intermediate) — gate and up fused
|
||||
intermediate_size = l1_out.shape[1] // 2
|
||||
gate = l1_out[:, :intermediate_size]
|
||||
up = l1_out[:, intermediate_size:]
|
||||
activated = torch.nn.functional.silu(gate) * up # (num_slots, intermediate) BF16
|
||||
|
||||
# ════════════════════════════════════════════════════════════════
|
||||
# L2: down projection (NVFP4 × NVFP4 → BF16)
|
||||
|
||||
Reference in New Issue
Block a user