[ci] try to add multi-node tests (#6280)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
Co-authored-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
youkaichao
2024-07-12 21:51:48 -07:00
committed by GitHub
parent d80aef3776
commit 41708e5034
13 changed files with 229 additions and 274 deletions

View File

@@ -6,7 +6,6 @@ from unittest.mock import MagicMock, patch
import openai
import pytest
import ray
import torch
from tensorizer import EncryptionParams
@@ -22,7 +21,7 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
tensorize_vllm_model)
from ..conftest import VllmRunner, cleanup
from ..utils import VLLM_PATH, RemoteOpenAIServer
from ..utils import RemoteOpenAIServer
# yapf conflicts with isort for this docstring
@@ -220,23 +219,21 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
json.dumps(model_loader_extra_config),
]
ray.init(runtime_env={"working_dir": VLLM_PATH})
with RemoteOpenAIServer(openai_args) as server:
print("Server ready.")
server = RemoteOpenAIServer(openai_args)
print("Server ready.")
client = server.get_client()
completion = client.completions.create(model=model_ref,
prompt="Hello, my name is",
max_tokens=5,
temperature=0.0)
client = server.get_client()
completion = client.completions.create(model=model_ref,
prompt="Hello, my name is",
max_tokens=5,
temperature=0.0)
assert completion.id is not None
assert len(completion.choices) == 1
assert len(completion.choices[0].text) >= 5
assert completion.choices[0].finish_reason == "length"
assert completion.usage == openai.types.CompletionUsage(
completion_tokens=5, prompt_tokens=6, total_tokens=11)
assert completion.id is not None
assert len(completion.choices) == 1
assert len(completion.choices[0].text) >= 5
assert completion.choices[0].finish_reason == "length"
assert completion.usage == openai.types.CompletionUsage(
completion_tokens=5, prompt_tokens=6, total_tokens=11)
def test_raise_value_error_on_invalid_load_format(vllm_runner):
@@ -282,7 +279,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
base_model.model.llm_engine.model_executor.shutdown()
del base_model
cleanup()
ray.shutdown()
# load model with two shards and serialize with encryption
model_path = str(tmp_path / (model_ref + "-%02d.tensors"))
@@ -305,7 +301,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
assert os.path.isfile(model_path % 0), "Serialization subprocess failed"
assert os.path.isfile(model_path % 1), "Serialization subprocess failed"
cleanup()
ray.shutdown()
loaded_vllm_model = vllm_runner(
model_ref,