fix: single PYBIND11_MODULE for combined gather .so

Both gather_kv.cu and gather_swa.cu are compiled into one .so.
Only gather_kv.cu defines the PYBIND11_MODULE; gather_swa.cu
just provides the function implementations.
This commit is contained in:
2026-05-30 21:13:24 +00:00
parent 8fb9d89658
commit 016d722abc
2 changed files with 6 additions and 4 deletions

View File

@@ -103,4 +103,8 @@ void gather_kv_cuda(
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("gather_kv", &gather_kv_cuda, "Gather KV entries into dense tile");
// gather_swa + gather_all_compressed are defined in gather_swa.cu
// (compiled together into the same .so)
m.def("gather_swa", &gather_swa_cuda, "Gather SWA window into dense BF16 tile");
m.def("gather_all_compressed", &gather_all_compressed_cuda, "Gather all compressed KV for HCA");
}

View File

@@ -171,7 +171,5 @@ void gather_all_compressed_cuda(
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("gather_swa", &gather_swa_cuda, "Gather SWA window into dense BF16 tile");
m.def("gather_all_compressed", &gather_all_compressed_cuda, "Gather all compressed KV for HCA");
}
// gather_swa functions are registered in gather_kv.cu's PYBIND11_MODULE
// (both files are compiled together into a single cache_gather.so)