OffloadingConnector: Support kernel_block_size != block_size (#30692)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
2026-01-22 14:30:04 +02:00
parent 841d53aaa8
commit 421012b63a
7 changed files with 158 additions and 116 deletions
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -692,7 +692,8 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
  // Cache ops
  // Swap in (out) the cache blocks from src to dst.
  cache_ops.def(
-      "swap_blocks(Tensor src, Tensor! dst, Tensor block_mapping) -> ()");
+      "swap_blocks(Tensor src, Tensor! dst,"
+      "            int block_size_in_bytes, Tensor block_mapping) -> ()");
  cache_ops.impl("swap_blocks", torch::kCUDA, &swap_blocks);

  // Reshape the key and value tensors and cache them.