diff --git a/README.md b/README.md index f4a6a60..bb5b00c 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,8 @@ The library also provides some environment variables, which may be useful: - `DG_JIT_PRINT_COMPILER_COMMAND`: `0` or `1`, print NVCC compilation command, `0` by default - Post optimization - `DG_JIT_DISABLE_FFMA_INTERLEAVE`: `0` or `1`, disable FFMA-interleaving optimization, `0` by default +- Heuristic selection + - `DG_PRINT_AUTOTUNE`: `0` or `1`, print selected configs for each shape, `0` by default - Testing - `DG_NSYS_PROFILING`: `0` or `1`, Nsight-system compatible testing, `0` by default diff --git a/deep_gemm/jit_kernels/tuner.py b/deep_gemm/jit_kernels/tuner.py index 9a8b6f2..4fc9283 100644 --- a/deep_gemm/jit_kernels/tuner.py +++ b/deep_gemm/jit_kernels/tuner.py @@ -74,8 +74,7 @@ class JITTuner: # Cache the best runtime and return if int(os.getenv('DG_JIT_DEBUG', 0)) or int(os.getenv('DG_PRINT_AUTOTUNE', 0)): - print( - f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}') + print(f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}') self.tuned[signature] = (best_runtime, best_keys) return best_runtime, best_keys