diff --git a/vllm/config/model.py b/vllm/config/model.py index 8e28e34bf..3c89658f0 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -106,6 +106,10 @@ class ModelConfig: """Name or path of the Hugging Face model to use. It is also used as the content for `model_name` tag in metrics output when `served_model_name` is not specified.""" + model_weights: str = "" + """Original model weights path. Used when the model is pulled from object + storage (e.g., RunAI) to preserve the original URI while `model` points to + the local directory.""" runner: RunnerOption = "auto" """The type of model runner to use. Each vLLM instance only supports one model runner, even if the same model can be used for multiple types.""" @@ -705,6 +709,10 @@ class ModelConfig: tokenizer: Tokenizer name or path """ + # Skip if model_weights is already set (model already pulled) + if self.model_weights: + return + if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): return diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 77201f668..a6a4f780a 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -354,6 +354,7 @@ class EngineArgs: """Arguments for vLLM engine.""" model: str = ModelConfig.model + model_weights: str = ModelConfig.model_weights served_model_name: str | list[str] | None = ModelConfig.served_model_name tokenizer: str | None = ModelConfig.tokenizer hf_config_path: str | None = ModelConfig.hf_config_path @@ -1206,6 +1207,7 @@ class EngineArgs: return ModelConfig( model=self.model, + model_weights=self.model_weights, hf_config_path=self.hf_config_path, runner=self.runner, convert=self.convert, @@ -1349,6 +1351,7 @@ class EngineArgs: model_config = self.create_model_config() self.model = model_config.model + self.model_weights = model_config.model_weights self.tokenizer = model_config.tokenizer self._check_feature_supported(model_config) diff --git a/vllm/model_executor/model_loader/runai_streamer_loader.py b/vllm/model_executor/model_loader/runai_streamer_loader.py index fb33d3c64..9d3ade4cd 100644 --- a/vllm/model_executor/model_loader/runai_streamer_loader.py +++ b/vllm/model_executor/model_loader/runai_streamer_loader.py @@ -108,8 +108,8 @@ class RunaiModelStreamerLoader(BaseModelLoader): def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None: """Load weights into a model.""" model_weights = model_config.model - if hasattr(model_config, "model_weights"): - model_weights = model_config.model_weights + if model_weights_override := model_config.model_weights: + model_weights = model_weights_override model.load_weights( self._get_weights_iterator(model_weights, model_config.revision) ) diff --git a/vllm/model_executor/model_loader/sharded_state_loader.py b/vllm/model_executor/model_loader/sharded_state_loader.py index 1538f0c2a..e27cedd99 100644 --- a/vllm/model_executor/model_loader/sharded_state_loader.py +++ b/vllm/model_executor/model_loader/sharded_state_loader.py @@ -110,8 +110,8 @@ class ShardedStateLoader(BaseModelLoader): from vllm.distributed import get_tensor_model_parallel_rank model_weights = model_config.model - if hasattr(model_config, "model_weights"): - model_weights = model_config.model_weights + if model_weights_override := model_config.model_weights: + model_weights = model_weights_override local_model_path = model_weights rank = get_tensor_model_parallel_rank()