From 2f41d6c0631c75799d40199b1787831f999d2836 Mon Sep 17 00:00:00 2001 From: Ajay Anubolu <124525760+AjAnubolu@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:42:16 -0700 Subject: [PATCH] [Bugfix] Fix cpu-offload-gb assertion with non-default block sizes (#36461) Signed-off-by: AjAnubolu Signed-off-by: Michael Goin Co-authored-by: Michael Goin --- vllm/v1/worker/gpu_model_runner.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index a9a4497a3..0b85a67f8 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -6517,11 +6517,6 @@ class GPUModelRunner( block_sizes != self._init_block_sizes or kernel_block_sizes != self._init_kernel_block_sizes ): - assert self.offload_config.uva.cpu_offload_gb == 0, ( - "Cannot re-initialize the input batch when CPU weight " - "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501 - "for more details." - ) self._init_block_sizes = block_sizes self._init_kernel_block_sizes = kernel_block_sizes self.input_batch = InputBatch(