[Tests] Skip model weight download for render-only test server (#36813)
Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
This commit is contained in:
@@ -144,6 +144,17 @@ class RemoteVLLMServer:
|
||||
"""Subclasses override this method to customize server process launch"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _pre_download_model(self, model: str, args) -> None:
|
||||
"""Download model weights before starting the server to avoid timeout."""
|
||||
is_local = os.path.isdir(model)
|
||||
if not is_local:
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
model_config = engine_args.create_model_config()
|
||||
load_config = engine_args.create_load_config()
|
||||
|
||||
model_loader = get_model_loader(load_config)
|
||||
model_loader.download_model(model_config)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
@@ -195,15 +206,7 @@ class RemoteVLLMServer:
|
||||
getattr(args, "show_hidden_metrics_for_version", None) is not None
|
||||
)
|
||||
|
||||
# download the model before starting the server to avoid timeout
|
||||
is_local = os.path.isdir(model)
|
||||
if not is_local:
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
model_config = engine_args.create_model_config()
|
||||
load_config = engine_args.create_load_config()
|
||||
|
||||
model_loader = get_model_loader(load_config)
|
||||
model_loader.download_model(model_config)
|
||||
self._pre_download_model(model, args)
|
||||
|
||||
# Record GPU memory before server start so we know what
|
||||
# "released" looks like.
|
||||
@@ -515,6 +518,19 @@ class RemoteLaunchRenderServer(RemoteVLLMServer):
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
def _pre_download_model(self, model: str, args) -> None:
|
||||
"""Download only the tokenizer files (no model weights needed)."""
|
||||
is_local = os.path.isdir(model)
|
||||
if not is_local:
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
model_config = engine_args.create_model_config()
|
||||
get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
tokenizer_mode=model_config.tokenizer_mode,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
revision=model_config.tokenizer_revision,
|
||||
)
|
||||
|
||||
def _wait_for_gpu_memory_release(self, timeout: float = 30.0):
|
||||
pass # No GPU used
|
||||
|
||||
|
||||
Reference in New Issue
Block a user