[BugFix] kv_offloading: Fix bug in loading of partial cpu blocks (#28951)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Or Ozeri
2025-11-20 12:55:10 +02:00
committed by GitHub
parent 06c20c9904
commit c0c2dd1e0b
2 changed files with 11 additions and 13 deletions

View File

@@ -135,22 +135,20 @@ class CpuGpuOffloadingHandler(OffloadingHandler):
assert src_blocks.ndim == 1
assert dst_blocks.ndim == 1
dst_sub_blocks_to_skip = -src_blocks.size % dst_block_size_factor
src_sub_block_count = src_blocks.size * src_block_size_factor
dst_sub_block_count = dst_blocks.size * dst_block_size_factor
src_sub_blocks_to_skip = -dst_blocks.size % src_block_size_factor
assert (
src_sub_block_count
== dst_blocks.size * dst_block_size_factor - dst_sub_blocks_to_skip
)
assert dst_sub_block_count == src_sub_block_count - src_sub_blocks_to_skip
src_to_dst = np.empty((src_sub_block_count, 2), dtype=np.int64)
expand_block_ids(src_blocks, src_block_size_factor, src_to_dst[:, 0])
src_to_dst = np.empty((dst_sub_block_count, 2), dtype=np.int64)
expand_block_ids(
dst_blocks,
dst_block_size_factor,
src_to_dst[:, 1],
skip_count=dst_sub_blocks_to_skip,
src_blocks,
src_block_size_factor,
src_to_dst[:, 0],
skip_count=src_sub_blocks_to_skip,
)
expand_block_ids(dst_blocks, dst_block_size_factor, src_to_dst[:, 1])
src_to_dst_tensor = torch.from_numpy(src_to_dst)
event = self.events_pool.pop() if self.events_pool else torch.Event()