diff --git a/single_shot_inference.py b/single_shot_inference.py index 614dfc32..de1ea9a6 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -380,7 +380,8 @@ def forward_attention(x_normed, w, li, cfg, rope_cos, rope_sin, # ===================================================================== def moe_forward(x, li, moe_runner, se_runner, router, token_id): topk_w, topk_ids = router(x, token_ids=token_id) - routed_out = moe_runner(x, topk_w, topk_ids); shared_out = se_runner(x) + routed_out = moe_runner.run(x, topk_w, topk_ids) + shared_out = se_runner.run(x) return routed_out + shared_out # =====================================================================