[XPU] Quick fix for TritonMLA to remove cuda hardcode (#39088)
Signed-off-by: Chendi Xue <chendi.xue@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -222,7 +222,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
|
||||
else:
|
||||
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
|
||||
elif current_platform.is_xpu():
|
||||
elif self.unquantized_backend == UnquantizedMoeBackend.XPU:
|
||||
w13 = layer.w13_weight
|
||||
w2 = layer.w2_weight
|
||||
|
||||
|
||||
Reference in New Issue
Block a user