[Kernels] Clean up FusedMoeMethodBase and modular kernel setup. Remove extra arguments from modular kernel methods. (#22035)

Signed-off-by: Bill Nell <bnell@redhat.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
2025-08-15 14:46:00 -04:00
parent 48b01fd4d4
commit 8ad7285ea2
54 changed files with 2010 additions and 1293 deletions
--- a/tests/kernels/moe/test_block_int8.py
+++ b/tests/kernels/moe/test_block_int8.py
@@ -118,13 +118,14 @@ def test_w8a8_block_int8_fused_moe(M, N, K, E, topk, block_size, dtype, seed):
    a = torch.randn((M, K), dtype=dtype) / 10
    score = torch.randn((M, E), dtype=dtype)

-    _, w1, w1_s, _, w2, w2_s = make_test_weights(E,
-                                                 N,
-                                                 K,
-                                                 dtype,
-                                                 torch.int8,
-                                                 per_act_token_quant=False,
-                                                 block_shape=block_size)
+    (_, w1, w1_s, _), (_, w2, w2_s,
+                       _) = make_test_weights(E,
+                                              N,
+                                              K,
+                                              dtype,
+                                              torch.int8,
+                                              per_act_token_quant=False,
+                                              block_shape=block_size)

    # Set the context to avoid lots of warning spam.
    with set_current_vllm_config(vllm_config):