[Bugfix] Fix weights offloading for sleep mode (#32947)
Signed-off-by: Jarno Seppänen <jseppanen@nvidia.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
@@ -286,9 +286,10 @@ class Worker(WorkerBase):
|
||||
# to hijack tensor allocation.
|
||||
def load_model(self) -> None:
|
||||
eep_scale_up = os.environ.get("VLLM_ELASTIC_EP_SCALE_UP_LAUNCH") == "1"
|
||||
with self._maybe_get_memory_pool_context(
|
||||
tag="weights"
|
||||
) and set_current_vllm_config(self.vllm_config):
|
||||
with (
|
||||
self._maybe_get_memory_pool_context(tag="weights"),
|
||||
set_current_vllm_config(self.vllm_config),
|
||||
):
|
||||
self.model_runner.load_model(eep_scale_up=eep_scale_up)
|
||||
|
||||
def update_config(self, overrides: dict[str, Any]) -> None:
|
||||
|
||||
Reference in New Issue
Block a user