[torch.compile] Enable silu_mul_fp8_quant fusion without custom ops enabled (#27146)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
2025-10-22 12:22:39 +08:00
parent ceacedc1f9
commit ab3e80042e
4 changed files with 134 additions and 76 deletions
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -80,7 +80,8 @@ class SiluAndMul(CustomOp):
        elif current_platform.is_cpu():
            self._forward_method = self.forward_native

-    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+    @staticmethod
+    def forward_native(x: torch.Tensor) -> torch.Tensor:
        """PyTorch-native implementation equivalent to forward()."""
        d = x.shape[-1] // 2
        return F.silu(x[..., :d]) * x[..., d:]