diff --git a/requirements/xpu.txt b/requirements/xpu.txt
index f15f0dcd1..050737164 100644
--- a/requirements/xpu.txt
+++ b/requirements/xpu.txt
@@ -15,4 +15,4 @@ torch==2.10.0+xpu
 torchaudio
 torchvision
 
-vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.1/vllm_xpu_kernels-0.1.1-cp312-cp312-linux_x86_64.whl
\ No newline at end of file
+vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.2/vllm_xpu_kernels-0.1.2-cp312-cp312-linux_x86_64.whl
diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py
index edf7544b9..dc17af87e 100644
--- a/vllm/model_executor/layers/fused_moe/__init__.py
+++ b/vllm/model_executor/layers/fused_moe/__init__.py
@@ -102,6 +102,7 @@ if HAS_TRITON:
     )
     from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
         XPUExperts,
+        XPUExpertsFp8,
     )
 
     __all__ += [
@@ -121,6 +122,7 @@ if HAS_TRITON:
         "BatchedDeepGemmExperts",
         "TritonOrDeepGemmExperts",
         "XPUExperts",
+        "XPUExpertsFp8",
     ]
 else:
     # Some model classes directly use the custom ops. Add placeholders
diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
index b94e4637e..3dd32f5af 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -52,6 +52,7 @@ class Fp8MoeBackend(Enum):
     AITER = "AITER"
     VLLM_CUTLASS = "VLLM_CUTLASS"
     BATCHED_VLLM_CUTLASS = "BATCHED_VLLM_CUTLASS"
+    XPU = "XPU"
 
 
 def backend_to_kernel_cls(
@@ -123,6 +124,13 @@ def backend_to_kernel_cls(
 
         return CutlassBatchedExpertsFp8
 
+    elif backend == Fp8MoeBackend.XPU:
+        from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
+            XPUExpertsFp8,
+        )
+
+        return XPUExpertsFp8
+
     else:
         raise ValueError(f"Unknown FP8 MoE backend: {backend.value}")
 
@@ -154,6 +162,7 @@ def select_fp8_moe_backend(
         Fp8MoeBackend.TRITON,
         Fp8MoeBackend.BATCHED_TRITON,
         Fp8MoeBackend.MARLIN,
+        Fp8MoeBackend.XPU,
     ]
 
     # NOTE(rob): We need to peak into the P/F selection to determine
@@ -393,6 +402,7 @@ def convert_to_fp8_moe_kernel_format(
             Fp8MoeBackend.BATCHED_TRITON,
             Fp8MoeBackend.VLLM_CUTLASS,
             Fp8MoeBackend.BATCHED_VLLM_CUTLASS,
+            Fp8MoeBackend.XPU,
         ]:
             raise ValueError(f"Unsupported FP8 MoE backend: {fp8_backend.value}")
 
diff --git a/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py b/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
index cfb88f6af..a20679ea6 100644
--- a/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
@@ -4,13 +4,16 @@ import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
     FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
 )
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceNoOP,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
+    kFp8DynamicTensorSym,
     kFp8StaticTensorSym,
 )
 from vllm.platforms import current_platform
@@ -20,6 +23,21 @@ if current_platform.is_xpu():
 
 
 class XPUExperts(mk.FusedMoEPermuteExpertsUnpermute):
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+        max_num_tokens: int | None = None,
+        num_dispatchers: int | None = None,
+    ):
+        super().__init__(
+            moe_config,
+            quant_config,
+            max_num_tokens,
+            num_dispatchers,
+        )
+        self.is_fp8 = False
+
     @property
     def expects_unquantized_inputs(self) -> bool:
         return True
@@ -49,10 +67,10 @@ class XPUExperts(mk.FusedMoEPermuteExpertsUnpermute):
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        # TODO: dispatch based on device.
         SUPPORTED_W_A = [
             (None, None),
             (kFp8StaticTensorSym, None),
+            (kFp8StaticTensorSym, kFp8DynamicTensorSym),
         ]
         return (weight_key, activation_key) in SUPPORTED_W_A
 
@@ -103,10 +121,10 @@ class XPUExperts(mk.FusedMoEPermuteExpertsUnpermute):
         xpu_fused_moe(
             hidden_states=hidden_states,
             w13=w1,
-            w13_scales=a1q_scale,
+            w13_scales=self.w1_scale,
             w13_bias=self.w1_bias,
             w2=w2,
-            w2_scales=a2_scale,
+            w2_scales=self.w2_scale,
             w2_bias=self.w2_bias,
             topk_weights=topk_weights,
             topk_ids=topk_ids,
@@ -116,5 +134,22 @@ class XPUExperts(mk.FusedMoEPermuteExpertsUnpermute):
             ep_rank=self.moe_config.ep_rank,
             ep_size=self.moe_config.ep_size,
             output=output,
+            is_fp8=self.is_fp8,
         )
-        return
+
+
+class XPUExpertsFp8(XPUExperts):
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+        max_num_tokens: int | None = None,
+        num_dispatchers: int | None = None,
+    ):
+        super().__init__(
+            moe_config,
+            quant_config,
+            max_num_tokens,
+            num_dispatchers,
+        )
+        self.is_fp8 = True