Add support for Mistral Large 3 inference with Flashinfer MoE (#33174)

Signed-off-by: Dimitrios Bariamis <12195802+dbari@users.noreply.github.com> Co-authored-by: Dimitrios Bariamis <12195802+dbari@users.noreply.github.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2026-01-31 07:48:27 +01:00
parent 73419abfae
commit f0bca83ee4
16 changed files with 1104 additions and 31 deletions
--- a/tests/kernels/moe/test_flashinfer.py
+++ b/tests/kernels/moe/test_flashinfer.py
@@ -135,6 +135,8 @@ class TestData:
                layer.w2_input_scale,
            )
        layer.custom_routing_function = Llama4MoE.custom_routing_function
+        layer.routing_method_type = RoutingMethodType.Llama4
+        layer.renormalize = False
        layer.intermediate_size_per_partition = n
        layer.ep_rank = 0
        layer.local_num_experts = e