Fix includes: use cutlass/float_subbyte.h (has float_e2m1_t and float_ue4m3_t), point to latest CUTLASS
This commit is contained in:
@@ -43,7 +43,7 @@
|
||||
#include "cutlass/kernel_hardware_info.hpp"
|
||||
#include "cutlass/detail/sm100_blockscaled_layout.hpp"
|
||||
|
||||
#include "cute/numeric/float8.hpp"
|
||||
#include "cutlass/float_subbyte.h"
|
||||
#include "cute/layout.hpp"
|
||||
|
||||
using namespace cute;
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "cutlass/kernel_hardware_info.hpp"
|
||||
#include "cutlass/detail/sm100_blockscaled_layout.hpp"
|
||||
|
||||
#include "cute/numeric/float8.hpp"
|
||||
#include "cutlass/float_subbyte.h"
|
||||
#include "cute/layout.hpp"
|
||||
|
||||
using namespace cute;
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
"""
|
||||
Setup script for CUTLASS NVFP4 block-scaled GEMM PyTorch extension.
|
||||
"""
|
||||
"""Setup script for CUTLASS NVFP4 block-scaled GEMM PyTorch extension."""
|
||||
|
||||
import os
|
||||
from setuptools import setup
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||
|
||||
# CUTLASS include directory
|
||||
# CUTLASS include directory — prefer the latest from GitHub
|
||||
CUTLASS_INCLUDE_DIR = os.environ.get(
|
||||
"CUTLASS_INCLUDE_DIR",
|
||||
"/usr/local/lib/python3.12/dist-packages/tilelang/3rdparty/cutlass/include"
|
||||
"/root/cutlass/include"
|
||||
)
|
||||
if not os.path.exists(os.path.join(CUTLASS_INCLUDE_DIR, "cutlass", "cutlass.h")):
|
||||
for alt in [
|
||||
"/root/cutlass/include",
|
||||
"/usr/local/lib/python3.12/dist-packages/tilelang/3rdparty/cutlass/include",
|
||||
"/usr/local/include/cutlass",
|
||||
"/opt/cutlass/include",
|
||||
|
||||
Reference in New Issue
Block a user