From c058ff44d4ccf411ca67abc0ce216571a789c6db Mon Sep 17 00:00:00 2001 From: Yongye Zhu Date: Sun, 22 Mar 2026 15:20:13 -0400 Subject: [PATCH] [Bigfix]fix lora test by pass padded size back to the layer (#37811) --- vllm/model_executor/layers/quantization/mxfp4.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 22077be8a..751ee6dfd 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -154,6 +154,15 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ) self.hidden_size = hidden_size = self.moe.hidden_dim + # Expose padded dimensions on the layer for LoRA and Marlin code + # that reads layer.hidden_size / layer.intermediate_size_per_partition. + layer.params_dtype = params_dtype + layer.num_experts = num_experts + layer.hidden_size = hidden_size + layer.intermediate_size_per_partition = ( + intermediate_size_per_partition_after_pad + ) + # Fused gate_up_proj (column parallel) w13_weight = torch.nn.Parameter( torch.zeros(