diff --git a/tests/unit/test_d1_hd512_only.py b/tests/unit/test_d1_hd512_only.py index 8b3a4767..7e3c8cb6 100644 --- a/tests/unit/test_d1_hd512_only.py +++ b/tests/unit/test_d1_hd512_only.py @@ -46,11 +46,7 @@ def test(): import time t0 = time.time() from cutlass.base_dsl.compiler import PtxasOptions, OptLevel - # OptLevel(0): skip MLIR optimizations for faster compilation. - # The bottleneck is the MLIR optimizer (not ptxas), so ptxas -j doesn't help. - # Verify correctness first at O0, then re-compile at O3 for production. - compiled = cute.compile(kernel, mQ, mK, mV, mC, stream, mLSE, - options="--opt-level 0") + compiled = cute.compile(kernel, mQ, mK, mV, mC, stream, mLSE) t1 = time.time() print(f'Compilation took {t1-t0:.1f}s', flush=True)