diff --git a/tests/unit/test_d1_hd512_only.py b/tests/unit/test_d1_hd512_only.py index 6ef04c1f..6a8e314f 100644 --- a/tests/unit/test_d1_hd512_only.py +++ b/tests/unit/test_d1_hd512_only.py @@ -47,9 +47,9 @@ def test(): t0 = time.time() from cutlass.base_dsl.compiler import CompileOptions, PtxasOptions, OptLevel # PtxasOptions -j64: use 64 threads for ptxas register allocation (B200 has 256 cores) - # OptLevel(0): skip MLIR optimizations for faster compilation (first verify correctness, then optimize) - compile_opts = CompileOptions((PtxasOptions("-j64"), OptLevel(0))) - compiled = cute.compile(kernel, mQ, mK, mV, mC, stream, mLSE, config=compile_opts) + # OptLevel(0): skip MLIR optimizations for faster compilation (verify correctness first, then optimize) + compiled = cute.compile(kernel, mQ, mK, mV, mC, stream, mLSE, + options="--ptxas-options '-j64' --opt-level 0") t1 = time.time() print(f'Compilation took {t1-t0:.1f}s', flush=True)