From d14962f0727be82a941ab89a5da0f5f53f3f8c36 Mon Sep 17 00:00:00 2001 From: Chenggang Zhao Date: Thu, 3 Apr 2025 15:53:29 +0800 Subject: [PATCH] Add `DG_NVCC_OVERRIDE_CPP_STANDARD` --- README.md | 1 + deep_gemm/jit/compiler.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6266863..a55311e 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ The library also provides some environment variables, which may be useful: - `DG_CACHE_DIR`: string, the cache directory to store compiled kernels, `$HOME/.deep_gemm` by default - `DG_NVCC_COMPILER`: string, specified NVCC compiler path; will find in `from torch.utils.cpp_extension.CUDA_HOME` by default +- `DG_NVCC_OVERRIDE_CPP_STANDARD`: integer (e.g., `20`), support for some old version GCC compiler - `DG_DISABLE_FFMA_INTERLEAVE`: 0 or 1, disable FFMA-interleaving optimization - `DG_PTXAS_VERBOSE`: 0 or 1, show detailed PTXAS compiler output - `DG_PRINT_REG_REUSE`: 0 or 1, print FFMA-interleaving details diff --git a/deep_gemm/jit/compiler.py b/deep_gemm/jit/compiler.py index fec2eb9..3cf20e3 100644 --- a/deep_gemm/jit/compiler.py +++ b/deep_gemm/jit/compiler.py @@ -96,7 +96,8 @@ def put(path, data, is_binary=False): def build(name: str, arg_defs: tuple, code: str) -> Runtime: # Compiler flags - nvcc_flags = ['-std=c++20', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda', + cpp_standard = int(os.getenv('DG_NVCC_OVERRIDE_CPP_STANDARD', 20)) + nvcc_flags = [f'-std=c++{cpp_standard}', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda', '-gencode=arch=compute_90a,code=sm_90a', '--ptxas-options=--register-usage-level=10' + (',--verbose' if 'DG_PTXAS_VERBOSE' in os.environ else ''), # Suppress some unnecessary warnings, such as unused variables for certain `constexpr` branch cases