[Bugfix] Fix cpu-offload-gb assertion with non-default block sizes (#36461)
Signed-off-by: AjAnubolu <anuboluajay@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -6517,11 +6517,6 @@ class GPUModelRunner(
|
|||||||
block_sizes != self._init_block_sizes
|
block_sizes != self._init_block_sizes
|
||||||
or kernel_block_sizes != self._init_kernel_block_sizes
|
or kernel_block_sizes != self._init_kernel_block_sizes
|
||||||
):
|
):
|
||||||
assert self.offload_config.uva.cpu_offload_gb == 0, (
|
|
||||||
"Cannot re-initialize the input batch when CPU weight "
|
|
||||||
"offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501
|
|
||||||
"for more details."
|
|
||||||
)
|
|
||||||
self._init_block_sizes = block_sizes
|
self._init_block_sizes = block_sizes
|
||||||
self._init_kernel_block_sizes = kernel_block_sizes
|
self._init_kernel_block_sizes = kernel_block_sizes
|
||||||
self.input_batch = InputBatch(
|
self.input_batch = InputBatch(
|
||||||
|
|||||||
Reference in New Issue
Block a user