[CI][torch.compile] Reduce e2e fusion test time (#33293)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
Signed-off-by: ProExpertProg <luka.govedic@gmail.com>
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
Luka Govedič
2026-02-04 19:09:03 -05:00
committed by GitHub
parent 439afa4eea
commit 4d9513537d
17 changed files with 1068 additions and 821 deletions

View File

@@ -1002,7 +1002,7 @@ def test_vllm_config_explicit_overrides():
assert config.compilation_config.pass_config.fuse_attn_quant is True
# Explicit cudagraph mode override on quantized model at O2
pass_config = PassConfig(fuse_gemm_comms=True)
pass_config = PassConfig(enable_qk_norm_rope_fusion=True)
compilation_config = CompilationConfig(
cudagraph_mode=CUDAGraphMode.NONE, pass_config=pass_config
)
@@ -1012,7 +1012,7 @@ def test_vllm_config_explicit_overrides():
compilation_config=compilation_config,
)
assert config.compilation_config.cudagraph_mode == CUDAGraphMode.NONE
assert config.compilation_config.pass_config.fuse_gemm_comms is True
assert config.compilation_config.pass_config.enable_qk_norm_rope_fusion is True
# Mode should still use default for O2
assert config.compilation_config.mode == CompilationMode.VLLM_COMPILE