[Bugfix] Fix cpu-offload-gb assertion with non-default block sizes (#36461)
Signed-off-by: AjAnubolu <anuboluajay@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -6517,11 +6517,6 @@ class GPUModelRunner(
|
||||
block_sizes != self._init_block_sizes
|
||||
or kernel_block_sizes != self._init_kernel_block_sizes
|
||||
):
|
||||
assert self.offload_config.uva.cpu_offload_gb == 0, (
|
||||
"Cannot re-initialize the input batch when CPU weight "
|
||||
"offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501
|
||||
"for more details."
|
||||
)
|
||||
self._init_block_sizes = block_sizes
|
||||
self._init_kernel_block_sizes = kernel_block_sizes
|
||||
self.input_batch = InputBatch(
|
||||
|
||||
Reference in New Issue
Block a user