From 016d722abc0201cd0d44848433a00b7b368bcad9 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 30 May 2026 21:13:24 +0000 Subject: [PATCH] fix: single PYBIND11_MODULE for combined gather .so Both gather_kv.cu and gather_swa.cu are compiled into one .so. Only gather_kv.cu defines the PYBIND11_MODULE; gather_swa.cu just provides the function implementations. --- dsv4/kernels/cuda/gather_kv.cu | 4 ++++ dsv4/kernels/cuda/gather_swa.cu | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dsv4/kernels/cuda/gather_kv.cu b/dsv4/kernels/cuda/gather_kv.cu index 77692d91..7ea27878 100644 --- a/dsv4/kernels/cuda/gather_kv.cu +++ b/dsv4/kernels/cuda/gather_kv.cu @@ -103,4 +103,8 @@ void gather_kv_cuda( PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("gather_kv", &gather_kv_cuda, "Gather KV entries into dense tile"); + // gather_swa + gather_all_compressed are defined in gather_swa.cu + // (compiled together into the same .so) + m.def("gather_swa", &gather_swa_cuda, "Gather SWA window into dense BF16 tile"); + m.def("gather_all_compressed", &gather_all_compressed_cuda, "Gather all compressed KV for HCA"); } diff --git a/dsv4/kernels/cuda/gather_swa.cu b/dsv4/kernels/cuda/gather_swa.cu index d34a18a4..9123a0f3 100644 --- a/dsv4/kernels/cuda/gather_swa.cu +++ b/dsv4/kernels/cuda/gather_swa.cu @@ -171,7 +171,5 @@ void gather_all_compressed_cuda( } -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("gather_swa", &gather_swa_cuda, "Gather SWA window into dense BF16 tile"); - m.def("gather_all_compressed", &gather_all_compressed_cuda, "Gather all compressed KV for HCA"); -} +// gather_swa functions are registered in gather_kv.cu's PYBIND11_MODULE +// (both files are compiled together into a single cache_gather.so)