[Tests] Skip model weight download for render-only test server (#36813)

Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
This commit is contained in:
Sage
2026-03-12 08:24:42 +02:00
committed by GitHub
parent 894843eb25
commit 802f306cd1

View File

@@ -144,6 +144,17 @@ class RemoteVLLMServer:
"""Subclasses override this method to customize server process launch"""
raise NotImplementedError
def _pre_download_model(self, model: str, args) -> None:
"""Download model weights before starting the server to avoid timeout."""
is_local = os.path.isdir(model)
if not is_local:
engine_args = AsyncEngineArgs.from_cli_args(args)
model_config = engine_args.create_model_config()
load_config = engine_args.create_load_config()
model_loader = get_model_loader(load_config)
model_loader.download_model(model_config)
def __init__(
self,
model: str,
@@ -195,15 +206,7 @@ class RemoteVLLMServer:
getattr(args, "show_hidden_metrics_for_version", None) is not None
)
# download the model before starting the server to avoid timeout
is_local = os.path.isdir(model)
if not is_local:
engine_args = AsyncEngineArgs.from_cli_args(args)
model_config = engine_args.create_model_config()
load_config = engine_args.create_load_config()
model_loader = get_model_loader(load_config)
model_loader.download_model(model_config)
self._pre_download_model(model, args)
# Record GPU memory before server start so we know what
# "released" looks like.
@@ -515,6 +518,19 @@ class RemoteLaunchRenderServer(RemoteVLLMServer):
start_new_session=True,
)
def _pre_download_model(self, model: str, args) -> None:
"""Download only the tokenizer files (no model weights needed)."""
is_local = os.path.isdir(model)
if not is_local:
engine_args = AsyncEngineArgs.from_cli_args(args)
model_config = engine_args.create_model_config()
get_tokenizer(
model_config.tokenizer,
tokenizer_mode=model_config.tokenizer_mode,
trust_remote_code=model_config.trust_remote_code,
revision=model_config.tokenizer_revision,
)
def _wait_for_gpu_memory_release(self, timeout: float = 30.0):
pass # No GPU used