[Core] Interface for accessing model from VllmRunner (#10353)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -3,6 +3,7 @@ import json
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
from functools import partial
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import openai
|
||||
@@ -24,7 +25,6 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
|
||||
# yapf: enable
|
||||
from vllm.utils import PlaceholderModule, import_from_path
|
||||
|
||||
from ..conftest import VllmRunner
|
||||
from ..utils import VLLM_PATH, RemoteOpenAIServer
|
||||
from .conftest import retry_until_skip
|
||||
|
||||
@@ -58,16 +58,6 @@ def is_curl_installed():
|
||||
return False
|
||||
|
||||
|
||||
def get_torch_model(vllm_runner: VllmRunner):
|
||||
return vllm_runner \
|
||||
.model \
|
||||
.llm_engine \
|
||||
.model_executor \
|
||||
.driver_worker \
|
||||
.model_runner \
|
||||
.model
|
||||
|
||||
|
||||
def write_keyfile(keyfile_path: str):
|
||||
encryption_params = EncryptionParams.random()
|
||||
pathlib.Path(keyfile_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -121,8 +111,10 @@ def test_deserialized_encrypted_vllm_model_has_same_outputs(
|
||||
|
||||
config_for_serializing = TensorizerConfig(tensorizer_uri=model_path,
|
||||
encryption_keyfile=key_path)
|
||||
serialize_vllm_model(get_torch_model(vllm_model),
|
||||
config_for_serializing)
|
||||
|
||||
vllm_model.apply_model(
|
||||
partial(serialize_vllm_model,
|
||||
tensorizer_config=config_for_serializing))
|
||||
|
||||
config_for_deserializing = TensorizerConfig(tensorizer_uri=model_path,
|
||||
encryption_keyfile=key_path)
|
||||
@@ -175,8 +167,10 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
|
||||
with vllm_runner(model_ref, ) as vllm_model:
|
||||
model_path = tmp_path / (model_ref + ".tensors")
|
||||
|
||||
serialize_vllm_model(get_torch_model(vllm_model),
|
||||
TensorizerConfig(tensorizer_uri=model_path))
|
||||
vllm_model.apply_model(
|
||||
partial(
|
||||
serialize_vllm_model,
|
||||
tensorizer_config=TensorizerConfig(tensorizer_uri=model_path)))
|
||||
|
||||
with vllm_runner(
|
||||
model_ref,
|
||||
@@ -215,8 +209,10 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
|
||||
with vllm_runner(model_ref, ) as vllm_model:
|
||||
model_path = tmp_path / (model_ref + ".tensors")
|
||||
|
||||
serialize_vllm_model(get_torch_model(vllm_model),
|
||||
TensorizerConfig(tensorizer_uri=model_path))
|
||||
vllm_model.apply_model(
|
||||
partial(
|
||||
serialize_vllm_model,
|
||||
tensorizer_config=TensorizerConfig(tensorizer_uri=model_path)))
|
||||
|
||||
model_loader_extra_config = {
|
||||
"tensorizer_uri": str(model_path),
|
||||
@@ -337,7 +333,9 @@ def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
|
||||
|
||||
with vllm_runner(model_ref) as vllm_model:
|
||||
outputs = vllm_model.generate(prompts, sampling_params)
|
||||
serialize_vllm_model(get_torch_model(vllm_model), config)
|
||||
|
||||
vllm_model.apply_model(
|
||||
partial(serialize_vllm_model, tensorizer_config=config))
|
||||
|
||||
assert is_vllm_tensorized(config)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user