[Bugfix] Fix weights offloading for sleep mode (#32947)

Signed-off-by: Jarno Seppänen <jseppanen@nvidia.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
J Seppänen
2026-02-10 22:38:17 +02:00
committed by GitHub
parent fdd6f2ad58
commit 506ad7d7c1

View File

@@ -286,9 +286,10 @@ class Worker(WorkerBase):
# to hijack tensor allocation.
def load_model(self) -> None:
eep_scale_up = os.environ.get("VLLM_ELASTIC_EP_SCALE_UP_LAUNCH") == "1"
with self._maybe_get_memory_pool_context(
tag="weights"
) and set_current_vllm_config(self.vllm_config):
with (
self._maybe_get_memory_pool_context(tag="weights"),
set_current_vllm_config(self.vllm_config),
):
self.model_runner.load_model(eep_scale_up=eep_scale_up)
def update_config(self, overrides: dict[str, Any]) -> None: