From bc32444b238d2ec3726f599cf3fc67dbaf51a6c6 Mon Sep 17 00:00:00 2001 From: Vel <110626982+Code4me2@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:28:01 -0800 Subject: [PATCH] [Kernel] Add enable_sm120_or_later for SM121 (DGX Spark) CUTLASS support (#33517) Signed-off-by: code4me2 --- csrc/cutlass_extensions/common.hpp | 11 +++++++++++ .../c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/csrc/cutlass_extensions/common.hpp b/csrc/cutlass_extensions/common.hpp index 1d5280139..91c215071 100644 --- a/csrc/cutlass_extensions/common.hpp +++ b/csrc/cutlass_extensions/common.hpp @@ -152,3 +152,14 @@ struct enable_sm120_only : Kernel { #endif } }; + +// SM12x family includes SM120 (RTX 5090) and SM121 (DGX Spark GB10) +template +struct enable_sm120_family : Kernel { + template + CUTLASS_DEVICE void operator()(Args&&... args) { +#if defined __CUDA_ARCH__ && (__CUDA_ARCH__ >= 1200 && __CUDA_ARCH__ < 1300) + Kernel::operator()(std::forward(args)...); +#endif + } +}; diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh index 811741aee..f255b27a1 100644 --- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh +++ b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh @@ -103,7 +103,8 @@ struct cutlass_3x_gemm_fp8_blockwise { MainloopScheduler >::CollectiveOp; - using KernelType = enable_sm120_only, CollectiveMainloop, CollectiveEpilogue>>; struct GemmKernel : public KernelType {};