[Refactor] Move FusedMoE hidden_size roundup to quant_method (#34285)

Signed-off-by: Bowen Bao <bowenbao@amd.com>
2026-03-26 23:38:26 -07:00
parent c2b17d71af
commit 0ae89f18fd
12 changed files with 204 additions and 222 deletions
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
@@ -439,7 +439,6 @@ def prepare_fp8_moe_layer_for_fi(
            layer.moe_config.is_act_and_mul,
            min_alignment,
        )
-        layer.intermediate_size_per_partition = new_intermediate
        layer.moe_config.intermediate_size_per_partition = new_intermediate

    # FI kernels require W31 layout rather than W13.