From d15c3b90fc70ba8d787ee2b172caf5b978909fe9 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 3 Mar 2026 15:31:59 -0800 Subject: [PATCH] [Core] Move save_tensorized_model logic to Worker (#35825) Signed-off-by: Nick Hill --- vllm/v1/worker/gpu_model_runner.py | 13 +------------ vllm/v1/worker/gpu_worker.py | 10 +++++----- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 8c92aab26..e4ddefc81 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -58,7 +58,7 @@ from vllm.model_executor.layers.rotary_embedding import ( MRotaryEmbedding, XDRotaryEmbedding, ) -from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader +from vllm.model_executor.model_loader import get_model_loader from vllm.model_executor.model_loader.reload import ( finalize_layerwise_reload, initialize_layerwise_reload, @@ -194,7 +194,6 @@ from .utils import ( ) if TYPE_CHECKING: - from vllm.model_executor.model_loader.tensorizer import TensorizerConfig from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput from vllm.v1.spec_decode.ngram_proposer import NgramProposer @@ -4510,16 +4509,6 @@ class GPUModelRunner( weights_not_loaded, ) - def save_tensorized_model( - self, - tensorizer_config: "TensorizerConfig", - ) -> None: - TensorizerLoader.save_model( - self.get_model(), - tensorizer_config=tensorizer_config, - model_config=self.model_config, - ) - def _get_prompt_logprobs_dict( self, hidden_states: torch.Tensor, diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 62f0433ef..c0654abd5 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -57,6 +57,7 @@ from vllm.v1.worker.utils import is_residual_scattered_for_sp from vllm.v1.worker.worker_base import WorkerBase from vllm.v1.worker.workspace import init_workspace_manager +from ...model_executor.model_loader import TensorizerLoader from .gpu.warmup import warmup_kernels from .utils import request_memory @@ -836,12 +837,11 @@ class Worker(WorkerBase): max_size=max_size, ) - def save_tensorized_model( - self, - tensorizer_config: "TensorizerConfig", - ) -> None: - self.model_runner.save_tensorized_model( + def save_tensorized_model(self, tensorizer_config: "TensorizerConfig") -> None: + TensorizerLoader.save_model( + self.get_model(), tensorizer_config=tensorizer_config, + model_config=self.model_config, ) def init_weight_transfer_engine(self, init_info: dict) -> None: