fix cutlass_3x_gemm_fp8_blockwise on sm103a (#32224)

Signed-off-by: Siyuan Fu <siyuanf@nvidia.com>
Co-authored-by: Pavani Majety <pmajety@nvidia.com>
This commit is contained in:
Lain
2026-02-02 11:47:46 -08:00
committed by GitHub
parent 0130223bd9
commit 089cd4f002
7 changed files with 129 additions and 82 deletions

View File

@@ -141,8 +141,8 @@ struct cutlass_3x_gemm_sm100 {
sizeof(typename CollectiveEpilogue::SharedStorage))>,
KernelSchedule>::CollectiveOp;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
using GemmKernel = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
};
template <typename ElementAB_, typename ElementD_,
@@ -202,8 +202,8 @@ struct cutlass_3x_gemm_sm120 {
sizeof(typename CollectiveEpilogue::SharedStorage))>,
KernelSchedule>::CollectiveOp;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
using GemmKernel = enable_sm120_only<cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
};
} // namespace vllm

View File

@@ -123,7 +123,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
MainloopScheduler
>::CollectiveOp>;
using KernelType = enable_sm100_only<cutlass::gemm::kernel::GemmUniversal<
using KernelType = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue>>;
struct GemmKernel : public KernelType {};

View File

@@ -90,8 +90,8 @@ struct cutlass_3x_gemm_sm100_fp8 {
// -----------------------------------------------------------
// Kernel definition
// -----------------------------------------------------------
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
using GemmKernel = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
};
template <typename InType, typename OutType, bool EnableBias>