Allocate more shared memory to attention kernel (#1154)

2023-09-26 22:27:13 -07:00
parent 03ffd0a022
commit cf5cb1e33e
7 changed files with 87 additions and 3 deletions
--- a/setup.py
+++ b/setup.py
@@ -195,6 +195,17 @@ quantization_extension = CUDAExtension(
 )
 ext_modules.append(quantization_extension)

+# Misc. CUDA utils.
+cuda_utils_extension = CUDAExtension(
+    name="vllm.cuda_utils",
+    sources=["csrc/cuda_utils.cpp", "csrc/cuda_utils_kernels.cu"],
+    extra_compile_args={
+        "cxx": CXX_FLAGS,
+        "nvcc": NVCC_FLAGS,
+    },
+)
+ext_modules.append(cuda_utils_extension)
+

 def get_path(*filepath) -> str:
    return os.path.join(ROOT_DIR, *filepath)