[torch.compile] Speed up MOE handling in forward_context (#33184)

Signed-off-by: Richard Zou <zou3519@gmail.com>
2026-01-27 18:17:54 -05:00
parent 3a6d5cbefd
commit d9aa39a3bb
4 changed files with 22 additions and 18 deletions
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -715,7 +715,7 @@ def test_mixtral_moe(

        # need to override the forward context for unittests, otherwise it assumes
        # we're running the model forward pass (the model specified in vllm_config)
-        get_forward_context().remaining_moe_layers = None
+        get_forward_context().all_moe_layers = None

        # Run forward passes for both MoE blocks
        hf_states, _ = hf_moe.forward(hf_inputs)