[CI] Reduce Blackwell Fusion test runtime by filtering tests and only run all tests in nightly (#28074)

This commit is contained in:
Copilot
2025-11-07 15:58:16 +08:00
committed by GitHub
parent 9da9208b20
commit a736e5ff77
2 changed files with 31 additions and 8 deletions

View File

@@ -54,11 +54,11 @@ if current_platform.is_cuda():
MODELS_FP4 = [
ModelBackendTestCase(
model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP4",
model_name="nvidia/Llama-3.1-8B-Instruct-FP4",
model_kwargs=dict(max_model_len=1024, kv_cache_dtype="fp8"),
backend=_Backend.FLASHINFER,
attention_fusions=48,
allreduce_fusions=96,
attention_fusions=32,
allreduce_fusions=65,
),
]
@@ -95,8 +95,7 @@ elif current_platform.is_rocm():
),
]
# TODO(luka) test both in nightly
CUSTOM_OPS_FP8 = ["-quant_fp8"] # , "+quant_fp8"]
CUSTOM_OPS_FP8 = ["-quant_fp8", "+quant_fp8"]
@pytest.mark.parametrize(
@@ -171,8 +170,7 @@ def test_attn_quant(
assert int(matches[0]) == attention_fusions
# TODO(luka) test both in nightly
CUSTOM_OPS_RMS_NORM = ["-rms_norm"] # , "+rms_norm"]
CUSTOM_OPS_RMS_NORM = ["-rms_norm", "+rms_norm"]
def custom_ops_product(*custom_ops_lists: list[str]) -> Iterable[str]: