[Bugfix] Fix cpu-offload-gb assertion with non-default block sizes (#36461)

Signed-off-by: AjAnubolu <anuboluajay@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Ajay Anubolu
2026-04-08 19:42:16 -07:00
committed by GitHub
parent 3aecdf08b4
commit 2f41d6c063

View File

@@ -6517,11 +6517,6 @@ class GPUModelRunner(
block_sizes != self._init_block_sizes
or kernel_block_sizes != self._init_kernel_block_sizes
):
assert self.offload_config.uva.cpu_offload_gb == 0, (
"Cannot re-initialize the input batch when CPU weight "
"offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501
"for more details."
)
self._init_block_sizes = block_sizes
self._init_kernel_block_sizes = kernel_block_sizes
self.input_batch = InputBatch(