[Bigfix]fix lora test by pass padded size back to the layer (#37811)

2026-03-22 15:20:13 -04:00
parent ce9b1d76cf
commit c058ff44d4
1 changed files with 9 additions and 0 deletions
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -154,6 +154,15 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
        )
        self.hidden_size = hidden_size = self.moe.hidden_dim
        # Expose padded dimensions on the layer for LoRA and Marlin code
        # that reads layer.hidden_size / layer.intermediate_size_per_partition.
        layer.params_dtype = params_dtype
        layer.num_experts = num_experts
        layer.hidden_size = hidden_size
        layer.intermediate_size_per_partition = (
            intermediate_size_per_partition_after_pad
        )
        # Fused gate_up_proj (column parallel)
        w13_weight = torch.nn.Parameter(
            torch.zeros(