[torch.compile] Speed up MOE handling in forward_context (#33184)
Signed-off-by: Richard Zou <zou3519@gmail.com>
This commit is contained in:
@@ -715,7 +715,7 @@ def test_mixtral_moe(
|
||||
|
||||
# need to override the forward context for unittests, otherwise it assumes
|
||||
# we're running the model forward pass (the model specified in vllm_config)
|
||||
get_forward_context().remaining_moe_layers = None
|
||||
get_forward_context().all_moe_layers = None
|
||||
|
||||
# Run forward passes for both MoE blocks
|
||||
hf_states, _ = hf_moe.forward(hf_inputs)
|
||||
|
||||
Reference in New Issue
Block a user