[AMD][torch.compile] Enable silu+fp8_quant fusion for rocm (#18082)

Signed-off-by: charlifu <charlifu@amd.com>
This commit is contained in:
Charlie Fu
2025-05-14 00:13:56 -05:00
committed by GitHub
parent 2d912fb66f
commit 7b2f28deba
6 changed files with 14 additions and 9 deletions

View File

@@ -7,6 +7,7 @@ from torch._inductor.pattern_matcher import (PatternMatcherPass, fwd_only,
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.platforms import current_platform
from .vllm_inductor_pass import VllmInductorPass
@@ -41,7 +42,7 @@ def empty_bf16(*args, **kwargs):
def empty_fp8(*args, **kwargs):
fp8 = torch.float8_e4m3fn
fp8 = current_platform.fp8_dtype()
return torch.empty(*args, **kwargs, dtype=fp8, device="cuda")