[CI][torch.compile] Reduce e2e fusion test time (#33293)
Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: ProExpertProg <luka.govedic@gmail.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@@ -1002,7 +1002,7 @@ def test_vllm_config_explicit_overrides():
|
||||
assert config.compilation_config.pass_config.fuse_attn_quant is True
|
||||
|
||||
# Explicit cudagraph mode override on quantized model at O2
|
||||
pass_config = PassConfig(fuse_gemm_comms=True)
|
||||
pass_config = PassConfig(enable_qk_norm_rope_fusion=True)
|
||||
compilation_config = CompilationConfig(
|
||||
cudagraph_mode=CUDAGraphMode.NONE, pass_config=pass_config
|
||||
)
|
||||
@@ -1012,7 +1012,7 @@ def test_vllm_config_explicit_overrides():
|
||||
compilation_config=compilation_config,
|
||||
)
|
||||
assert config.compilation_config.cudagraph_mode == CUDAGraphMode.NONE
|
||||
assert config.compilation_config.pass_config.fuse_gemm_comms is True
|
||||
assert config.compilation_config.pass_config.enable_qk_norm_rope_fusion is True
|
||||
# Mode should still use default for O2
|
||||
assert config.compilation_config.mode == CompilationMode.VLLM_COMPILE
|
||||
|
||||
|
||||
Reference in New Issue
Block a user