From c058ff44d4ccf411ca67abc0ce216571a789c6db Mon Sep 17 00:00:00 2001
From: Yongye Zhu <zyy1102000@gmail.com>
Date: Sun, 22 Mar 2026 15:20:13 -0400
Subject: [PATCH] [Bigfix]fix lora test by pass padded size back to the layer
 (#37811)

---
 vllm/model_executor/layers/quantization/mxfp4.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py
index 22077be8a..751ee6dfd 100644
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -154,6 +154,15 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
         )
         self.hidden_size = hidden_size = self.moe.hidden_dim
 
+        # Expose padded dimensions on the layer for LoRA and Marlin code
+        # that reads layer.hidden_size / layer.intermediate_size_per_partition.
+        layer.params_dtype = params_dtype
+        layer.num_experts = num_experts
+        layer.hidden_size = hidden_size
+        layer.intermediate_size_per_partition = (
+            intermediate_size_per_partition_after_pad
+        )
+
         # Fused gate_up_proj (column parallel)
         w13_weight = torch.nn.Parameter(
             torch.zeros(