[ci] try to add multi-node tests (#6280)
Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
@@ -6,7 +6,6 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
import ray
|
||||
import torch
|
||||
from tensorizer import EncryptionParams
|
||||
|
||||
@@ -22,7 +21,7 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
|
||||
tensorize_vllm_model)
|
||||
|
||||
from ..conftest import VllmRunner, cleanup
|
||||
from ..utils import VLLM_PATH, RemoteOpenAIServer
|
||||
from ..utils import RemoteOpenAIServer
|
||||
|
||||
# yapf conflicts with isort for this docstring
|
||||
|
||||
@@ -220,23 +219,21 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
|
||||
json.dumps(model_loader_extra_config),
|
||||
]
|
||||
|
||||
ray.init(runtime_env={"working_dir": VLLM_PATH})
|
||||
with RemoteOpenAIServer(openai_args) as server:
|
||||
print("Server ready.")
|
||||
|
||||
server = RemoteOpenAIServer(openai_args)
|
||||
print("Server ready.")
|
||||
client = server.get_client()
|
||||
completion = client.completions.create(model=model_ref,
|
||||
prompt="Hello, my name is",
|
||||
max_tokens=5,
|
||||
temperature=0.0)
|
||||
|
||||
client = server.get_client()
|
||||
completion = client.completions.create(model=model_ref,
|
||||
prompt="Hello, my name is",
|
||||
max_tokens=5,
|
||||
temperature=0.0)
|
||||
|
||||
assert completion.id is not None
|
||||
assert len(completion.choices) == 1
|
||||
assert len(completion.choices[0].text) >= 5
|
||||
assert completion.choices[0].finish_reason == "length"
|
||||
assert completion.usage == openai.types.CompletionUsage(
|
||||
completion_tokens=5, prompt_tokens=6, total_tokens=11)
|
||||
assert completion.id is not None
|
||||
assert len(completion.choices) == 1
|
||||
assert len(completion.choices[0].text) >= 5
|
||||
assert completion.choices[0].finish_reason == "length"
|
||||
assert completion.usage == openai.types.CompletionUsage(
|
||||
completion_tokens=5, prompt_tokens=6, total_tokens=11)
|
||||
|
||||
|
||||
def test_raise_value_error_on_invalid_load_format(vllm_runner):
|
||||
@@ -282,7 +279,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
|
||||
base_model.model.llm_engine.model_executor.shutdown()
|
||||
del base_model
|
||||
cleanup()
|
||||
ray.shutdown()
|
||||
|
||||
# load model with two shards and serialize with encryption
|
||||
model_path = str(tmp_path / (model_ref + "-%02d.tensors"))
|
||||
@@ -305,7 +301,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
|
||||
assert os.path.isfile(model_path % 0), "Serialization subprocess failed"
|
||||
assert os.path.isfile(model_path % 1), "Serialization subprocess failed"
|
||||
cleanup()
|
||||
ray.shutdown()
|
||||
|
||||
loaded_vllm_model = vllm_runner(
|
||||
model_ref,
|
||||
|
||||
Reference in New Issue
Block a user