fix: guard CUTLASS includes with __CUDA_ARCH__ for host compilation
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
#include "fmha_sm100.cuh"
|
||||
#include <ATen/ATen.h>
|
||||
#include <torch/extension.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace dsv4 {
|
||||
namespace kernels {
|
||||
|
||||
@@ -29,16 +29,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp8.h>
|
||||
#include <cuda_bf16.h>
|
||||
|
||||
// CUTLASS C++ includes (CUDA device code only)
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#include <cutlass/cutlass.h>
|
||||
#include <cute/arch/mma_sm100_umma.hpp>
|
||||
#include <cute/arch/copy_sm100.hpp>
|
||||
#include <cute/arch/copy_sm100_tma.hpp>
|
||||
#include <cute/arch/tmem_allocator_sm100.hpp>
|
||||
#include <cute/arch/cluster_sm100.hpp>
|
||||
#endif
|
||||
|
||||
namespace dsv4 {
|
||||
namespace kernels {
|
||||
|
||||
Reference in New Issue
Block a user