Merge pull request #135 from danthe3rd/patch-3

Fix import-time error: "undefined symbol: _ZN3c104cuda20getCurrentCUDAStreamEa"
This commit is contained in:
dan_the_3rd
2025-07-29 10:14:15 +02:00
committed by GitHub
parent dd6ed14acb
commit fb7c687548

View File

@@ -4,7 +4,7 @@ import shutil
import subprocess
from setuptools import find_packages
from setuptools.command.build_py import build_py
from torch.utils.cpp_extension import CppExtension, CUDA_HOME
from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME
current_dir = os.path.dirname(os.path.realpath(__file__))
cxx_flags = ['-std=c++20', '-O3', '-fPIC', '-Wno-psabi']
@@ -89,7 +89,7 @@ if __name__ == '__main__':
]
},
ext_modules=[
CppExtension(name='deep_gemm_cpp',
CUDAExtension(name='deep_gemm_cpp',
sources=sources,
include_dirs=build_include_dirs,
libraries=build_libraries,