[Bugfix][CI] Move resolving cudagraph_mode before initializing attn_metadata_builder (#27427)

Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com>
This commit is contained in:
fhl2000
2025-10-24 11:31:14 +08:00
committed by GitHub
parent 8dbe0c527f
commit 85fee74b33
3 changed files with 34 additions and 19 deletions

View File

@@ -132,6 +132,9 @@ def test_attn_quant(
mode = CUDAGraphMode.FULL_AND_PIECEWISE
splitting_ops: list[str] | None = None
else:
# FIXME: Llama-4-Scout-17B-16E-Instruct-FP8 + FlashInfer + Blackwell end at
# CUDAGraphMode.NONE here because it derives an attention backend that
# does not support full cudagraphs
mode = CUDAGraphMode.FULL_DECODE_ONLY
splitting_ops = []