[kv_offload+HMA][2/N]: Support multiple KV groups in GPULoadStoreSpec (#36642)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
Or Ozeri
2026-03-18 19:26:40 +02:00
committed by GitHub
parent 39bfb57b7c
commit 5dd8df0701
3 changed files with 43 additions and 10 deletions

View File

@@ -135,19 +135,19 @@ def test_transfer(
# set transfer direction
if gpu_to_cpu:
handler = handlers.gpu_to_cpu_handler
src_spec_class = GPULoadStoreSpec
dst_spec_class = CPULoadStoreSpec
src_blocks = gpu_blocks
dst_blocks = cpu_blocks
src_spec = GPULoadStoreSpec(src_blocks, group_sizes=(len(src_blocks),))
dst_spec = CPULoadStoreSpec(dst_blocks)
src_blocks_in_kernel_block_size = gpu_blocks_in_kernel_block_size
dst_blocks_in_kernel_block_size = cpu_blocks_in_kernel_block_size
dst_size_in_kernel_blocks = num_cpu_blocks * kernel_blocks_per_cpu_block
else:
handler = handlers.cpu_to_gpu_handler
src_spec_class = CPULoadStoreSpec
dst_spec_class = GPULoadStoreSpec
src_blocks = cpu_blocks
dst_blocks = gpu_blocks
src_spec = CPULoadStoreSpec(src_blocks)
dst_spec = GPULoadStoreSpec(dst_blocks, group_sizes=(len(dst_blocks),))
src_blocks_in_kernel_block_size = cpu_blocks_in_kernel_block_size
dst_blocks_in_kernel_block_size = gpu_blocks_in_kernel_block_size
dst_size_in_kernel_blocks = num_gpu_blocks * kernel_blocks_per_gpu_block
@@ -159,10 +159,6 @@ def test_transfer(
):
dst_to_src[dst_block] = src_block
# build transfer specs
src_spec = src_spec_class(src_blocks)
dst_spec = dst_spec_class(dst_blocks)
# clone src and dst tensors before transfer
orig_src_caches = [x.clone() for x in handler.src_tensors]
orig_dst_caches = [x.clone() for x in handler.dst_tensors]