fix: guard CUTLASS includes with __CUDA_ARCH__ for host compilation

This commit is contained in:
2026-05-28 05:09:07 +00:00
parent 41e59a2423
commit 0c73a024ba
2 changed files with 4 additions and 3 deletions

View File

@@ -5,7 +5,6 @@
#include "fmha_sm100.cuh"
#include <ATen/ATen.h>
#include <torch/extension.h>
#include <cuda_runtime.h>
namespace dsv4 {
namespace kernels {

View File

@@ -29,16 +29,18 @@
#pragma once
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp8.h>
#include <cuda_bf16.h>
// CUTLASS C++ includes (CUDA device code only)
#if defined(__CUDA_ARCH__)
#include <cutlass/cutlass.h>
#include <cute/arch/mma_sm100_umma.hpp>
#include <cute/arch/copy_sm100.hpp>
#include <cute/arch/copy_sm100_tma.hpp>
#include <cute/arch/tmem_allocator_sm100.hpp>
#include <cute/arch/cluster_sm100.hpp>
#endif
namespace dsv4 {
namespace kernels {