diff --git a/tests/evals/gsm8k/configs/Qwen3-30B-A3B-MXFP4A16.yaml b/tests/evals/gsm8k/configs/Qwen3-30B-A3B-MXFP4A16.yaml
new file mode 100644
index 000000000..215edebcb
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-30B-A3B-MXFP4A16.yaml
@@ -0,0 +1,5 @@
+model_name: nm-testing/Qwen3-30B-A3B-MXFP4A16
+accuracy_threshold: 0.88
+num_questions: 1319
+num_fewshot: 5
+server_args: "--enforce-eager --max-model-len 4096"
diff --git a/tests/evals/gsm8k/configs/models-small.txt b/tests/evals/gsm8k/configs/models-small.txt
index 7bce3f000..a6a2f6c64 100644
--- a/tests/evals/gsm8k/configs/models-small.txt
+++ b/tests/evals/gsm8k/configs/models-small.txt
@@ -4,3 +4,4 @@ Llama-3-8B-Instruct-nonuniform-CT.yaml
 Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
 Qwen1.5-MoE-W4A16-CT.yaml
 DeepSeek-V2-Lite-Instruct-FP8.yaml
+Qwen3-30B-A3B-MXFP4A16.yaml
\ No newline at end of file
diff --git a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
index 547a2a795..1efb4d092 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
@@ -10,6 +10,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
     FusedMoEQuantConfig,
+    mxfp4_w4a16_moe_quant_config,
     nvfp4_moe_quant_config,
     nvfp4_w4a16_moe_quant_config,
 )
@@ -193,6 +194,16 @@ def convert_to_nvfp4_moe_kernel_format(
     )
 
 
+def make_mxfp4_moe_quant_config(
+    w13_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+) -> FusedMoEQuantConfig:
+    return mxfp4_w4a16_moe_quant_config(
+        w1_scale=w13_scale,
+        w2_scale=w2_scale,
+    )
+
+
 def make_nvfp4_moe_quant_config(
     backend: NvFp4MoeBackend,
     w13_scale: torch.Tensor,
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index b71921c5c..85e73e504 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -52,6 +52,7 @@ from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
     NvFp4MoeBackend,
     convert_to_nvfp4_moe_kernel_format,
     is_global_sf_supported_for_nvfp4_backend,
+    make_mxfp4_moe_quant_config,
     make_nvfp4_moe_kernel,
     make_nvfp4_moe_quant_config,
     select_nvfp4_moe_backend,
@@ -79,6 +80,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils import (
 )
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
     is_fp4_marlin_supported,
+    prepare_moe_fp4_layer_for_marlin,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     convert_bf16_scales_to_fp8,
@@ -145,6 +147,9 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
         input_quant = scheme_dict.get("input_activations")
         format = scheme_dict.get("format")
 
+        if quant_config._is_mxfp4(weight_quant):
+            return CompressedTensorsW4A4Mxfp4MoEMethod(layer.moe_config)
+
         if quant_config._is_wNa16_group_channel(weight_quant, input_quant):
             # group_size=None means channelwise
             group_size = weight_quant.group_size or -1
@@ -224,6 +229,140 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
             )
 
 
+class CompressedTensorsW4A4Mxfp4MoEMethod(CompressedTensorsMoEMethod):
+    def __init__(self, moe):
+        super().__init__(moe)
+        self.group_size = 32
+        self.mxfp4_backend = NvFp4MoeBackend.MARLIN
+        self.kernel: mk.FusedMoEModularKernel | None = None
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.params_dtype = params_dtype
+
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // 2,
+                requires_grad=False,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        w13_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // self.group_size,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // self.group_size,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        return make_mxfp4_moe_quant_config(
+            w13_scale=layer.w13_weight_scale, w2_scale=layer.w2_weight_scale
+        )
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        layer.w13_weight = torch.nn.Parameter(
+            layer.w13_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w13_weight_packed")
+
+        layer.w2_weight = torch.nn.Parameter(
+            layer.w2_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w2_weight_packed")
+
+        prepare_moe_fp4_layer_for_marlin(layer)
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config is not None:
+            self.kernel = make_nvfp4_moe_kernel(
+                backend=self.mxfp4_backend,
+                quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+            )
+
+    def apply(
+        self,
+        layer: FusedMoE,
+        router: FusedMoERouter,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        if isinstance(x, tuple):
+            x_routing, _ = x
+        else:
+            x_routing = x
+
+        topk_weights, topk_ids = router.select_experts(
+            hidden_states=x_routing,
+            router_logits=router_logits,
+        )
+        assert self.kernel is not None
+        return self.kernel(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            inplace=False,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+        )
+
+
 class CompressedTensorsW4A4Nvfp4MoEMethod(CompressedTensorsMoEMethod):
     def __init__(
         self,