diff --git a/vllm/patches/fused_moe/experts/cutedsl_moe.py b/vllm/patches/fused_moe/experts/cutedsl_moe.py
index 7580efc6..0f56b3d4 100644
--- a/vllm/patches/fused_moe/experts/cutedsl_moe.py
+++ b/vllm/patches/fused_moe/experts/cutedsl_moe.py
@@ -144,8 +144,16 @@ class CuTeDSLMoEExperts(mk.FusedMoEExpertsModular):
         # We have views into the same memory (l1_fp4, l2_fp4), but the runner
         # will create its own copies in _ensure_stacked. Free the layer refs
         # now so the memory can be reclaimed when the views are no longer held.
-        layer.w13_weight = None
-        layer.w2_weight = None
+        # NOTE: The modular kernel framework reads w1.shape[0] in its outer
+        # apply() before delegating to our expert impl, so we can't set the
+        # weights to None. Instead, replace with a shape-preserving dummy on CPU
+        # to free GPU memory while keeping the shape metadata accessible.
+        layer.w13_weight = torch.empty(
+            num_experts, 2 * intermediate_size, hidden_size // 2,
+            device='cpu', dtype=torch.uint8)
+        layer.w2_weight = torch.empty(
+            num_experts, hidden_size, intermediate_size // 2,
+            device='cpu', dtype=torch.uint8)
         layer.w13_weight_scale = None
         layer.w2_weight_scale = None
         layer.w13_weight_scale_2 = None