[CI] Reduce Blackwell Fusion test runtime by filtering tests and only run all tests in nightly (#28074)
This commit is contained in:
@@ -54,11 +54,11 @@ if current_platform.is_cuda():
|
||||
|
||||
MODELS_FP4 = [
|
||||
ModelBackendTestCase(
|
||||
model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP4",
|
||||
model_name="nvidia/Llama-3.1-8B-Instruct-FP4",
|
||||
model_kwargs=dict(max_model_len=1024, kv_cache_dtype="fp8"),
|
||||
backend=_Backend.FLASHINFER,
|
||||
attention_fusions=48,
|
||||
allreduce_fusions=96,
|
||||
attention_fusions=32,
|
||||
allreduce_fusions=65,
|
||||
),
|
||||
]
|
||||
|
||||
@@ -95,8 +95,7 @@ elif current_platform.is_rocm():
|
||||
),
|
||||
]
|
||||
|
||||
# TODO(luka) test both in nightly
|
||||
CUSTOM_OPS_FP8 = ["-quant_fp8"] # , "+quant_fp8"]
|
||||
CUSTOM_OPS_FP8 = ["-quant_fp8", "+quant_fp8"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -171,8 +170,7 @@ def test_attn_quant(
|
||||
assert int(matches[0]) == attention_fusions
|
||||
|
||||
|
||||
# TODO(luka) test both in nightly
|
||||
CUSTOM_OPS_RMS_NORM = ["-rms_norm"] # , "+rms_norm"]
|
||||
CUSTOM_OPS_RMS_NORM = ["-rms_norm", "+rms_norm"]
|
||||
|
||||
|
||||
def custom_ops_product(*custom_ops_lists: list[str]) -> Iterable[str]:
|
||||
|
||||
Reference in New Issue
Block a user