[XPU] bump up xpu-kernel v0.1.5, transpose moe weights (#38342)

Signed-off-by: mayuyuace <qiming1.zhang@intel.com>
Signed-off-by: Qiming Zhang <qiming1.zhang@intel.com>
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Qiming Zhang
2026-04-03 07:10:02 -07:00
committed by GitHub
parent 580090db6b
commit 6b4872240f
3 changed files with 17 additions and 1 deletions

View File

@@ -15,4 +15,4 @@ torch==2.10.0+xpu
torchaudio
torchvision
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.4/vllm_xpu_kernels-0.1.4-cp38-abi3-manylinux_2_28_x86_64.whl
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.5/vllm_xpu_kernels-0.1.5-cp38-abi3-manylinux_2_28_x86_64.whl

View File

@@ -222,6 +222,18 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
else:
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
elif current_platform.is_xpu():
w13 = layer.w13_weight
w2 = layer.w2_weight
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
self._setup_kernel(
layer=layer,
w13=w13,
w2=w2,
)
else:
self._setup_kernel(
layer=layer,

View File

@@ -1028,6 +1028,10 @@ class Fp8OnlineMoEMethod(Fp8MoEMethod):
layer.w2_weight[expert, :, :]
)
if current_platform.is_xpu():
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
# Shuffle weights to runtime format and setup kernel.
self._setup_kernel(
layer,