From 4a76ad12e00109f3bb06ccbd7089fc3612564e31 Mon Sep 17 00:00:00 2001 From: RobTand Date: Wed, 25 Mar 2026 11:18:25 -0400 Subject: [PATCH] =?UTF-8?q?[Bugfix]=20Preserve=20CUDA=20arch=20suffix=20(a?= =?UTF-8?q?/f)=20for=20SM12x=20=E2=80=94=20fixes=20NVFP4=20NaN=20on=20desk?= =?UTF-8?q?top=20Blackwell=20(#37725)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rob Tand Co-authored-by: Lucas Wilkinson --- CMakeLists.txt | 4 ++-- cmake/utils.cmake | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e438ff41d..202eb2b4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,10 +94,10 @@ find_package(Torch REQUIRED) # This check must happen after find_package(Torch) because that's when CMAKE_CUDA_COMPILER_VERSION gets defined if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0") + set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0;12.1") elseif(DEFINED CMAKE_CUDA_COMPILER_VERSION AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) - set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0") + set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0;12.1") else() set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0") endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake index bdb2ba74d..fd3d7e0ae 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -173,8 +173,10 @@ print(candidates[0] if candidates else '') endfunction() # Macro for converting a `gencode` version number to a cmake version number. +# Preserves architecture-specific suffixes (a/f) needed for correct +# __CUDA_ARCH_FAMILY_SPECIFIC__ definition. E.g. "121a" -> "12.1a". macro(string_to_ver OUT_VER IN_STR) - string(REGEX REPLACE "\([0-9]+\)\([0-9]\)" "\\1.\\2" ${OUT_VER} ${IN_STR}) + string(REGEX REPLACE "\([0-9]+\)\([0-9][af]?\)" "\\1.\\2" ${OUT_VER} ${IN_STR}) endmacro() # @@ -211,7 +213,7 @@ endmacro() function(extract_unique_cuda_archs_ascending OUT_ARCHES CUDA_ARCH_FLAGS) set(_CUDA_ARCHES) foreach(_ARCH ${CUDA_ARCH_FLAGS}) - string(REGEX MATCH "arch=compute_\([0-9]+a?\)" _COMPUTE ${_ARCH}) + string(REGEX MATCH "arch=compute_\([0-9]+[af]?\)" _COMPUTE ${_ARCH}) if (_COMPUTE) set(_COMPUTE ${CMAKE_MATCH_1}) endif()