[FEAT] [ROCm]: AITER Fused MOE V1 Support (#16752)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com>
2025-04-25 11:06:50 +08:00
parent 0d6e187e88
commit eef364723c
3 changed files with 302 additions and 130 deletions
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -304,9 +304,9 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
            e_score_correction_bias=e_score_correction_bias)

        return self.fused_experts_func(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
            topk_weights=topk_weights,
            topk_ids=topk_ids,
            inplace=True,