RayLLM Bugfix - Preserve obj store URL for multi engine_config creation (#30803)
Signed-off-by: Omer Dayan <omdayan@nvidia.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -106,6 +106,10 @@ class ModelConfig:
|
||||
"""Name or path of the Hugging Face model to use. It is also used as the
|
||||
content for `model_name` tag in metrics output when `served_model_name` is
|
||||
not specified."""
|
||||
model_weights: str = ""
|
||||
"""Original model weights path. Used when the model is pulled from object
|
||||
storage (e.g., RunAI) to preserve the original URI while `model` points to
|
||||
the local directory."""
|
||||
runner: RunnerOption = "auto"
|
||||
"""The type of model runner to use. Each vLLM instance only supports one
|
||||
model runner, even if the same model can be used for multiple types."""
|
||||
@@ -705,6 +709,10 @@ class ModelConfig:
|
||||
tokenizer: Tokenizer name or path
|
||||
"""
|
||||
|
||||
# Skip if model_weights is already set (model already pulled)
|
||||
if self.model_weights:
|
||||
return
|
||||
|
||||
if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)):
|
||||
return
|
||||
|
||||
|
||||
@@ -354,6 +354,7 @@ class EngineArgs:
|
||||
"""Arguments for vLLM engine."""
|
||||
|
||||
model: str = ModelConfig.model
|
||||
model_weights: str = ModelConfig.model_weights
|
||||
served_model_name: str | list[str] | None = ModelConfig.served_model_name
|
||||
tokenizer: str | None = ModelConfig.tokenizer
|
||||
hf_config_path: str | None = ModelConfig.hf_config_path
|
||||
@@ -1206,6 +1207,7 @@ class EngineArgs:
|
||||
|
||||
return ModelConfig(
|
||||
model=self.model,
|
||||
model_weights=self.model_weights,
|
||||
hf_config_path=self.hf_config_path,
|
||||
runner=self.runner,
|
||||
convert=self.convert,
|
||||
@@ -1349,6 +1351,7 @@ class EngineArgs:
|
||||
|
||||
model_config = self.create_model_config()
|
||||
self.model = model_config.model
|
||||
self.model_weights = model_config.model_weights
|
||||
self.tokenizer = model_config.tokenizer
|
||||
|
||||
self._check_feature_supported(model_config)
|
||||
|
||||
@@ -108,8 +108,8 @@ class RunaiModelStreamerLoader(BaseModelLoader):
|
||||
def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
|
||||
"""Load weights into a model."""
|
||||
model_weights = model_config.model
|
||||
if hasattr(model_config, "model_weights"):
|
||||
model_weights = model_config.model_weights
|
||||
if model_weights_override := model_config.model_weights:
|
||||
model_weights = model_weights_override
|
||||
model.load_weights(
|
||||
self._get_weights_iterator(model_weights, model_config.revision)
|
||||
)
|
||||
|
||||
@@ -110,8 +110,8 @@ class ShardedStateLoader(BaseModelLoader):
|
||||
from vllm.distributed import get_tensor_model_parallel_rank
|
||||
|
||||
model_weights = model_config.model
|
||||
if hasattr(model_config, "model_weights"):
|
||||
model_weights = model_config.model_weights
|
||||
if model_weights_override := model_config.model_weights:
|
||||
model_weights = model_weights_override
|
||||
local_model_path = model_weights
|
||||
|
||||
rank = get_tensor_model_parallel_rank()
|
||||
|
||||
Reference in New Issue
Block a user