[CI/Build] Simplify OpenAI server setup in tests (#5100)

This commit is contained in:
Cyrus Leung
2024-06-14 02:21:53 +08:00
committed by GitHub
parent 03dccc886e
commit 39873476f8
6 changed files with 284 additions and 237 deletions

View File

@@ -22,11 +22,12 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
tensorize_vllm_model)
from ..conftest import VllmRunner, cleanup
from ..utils import ServerRunner
from ..utils import RemoteOpenAIServer
# yapf conflicts with isort for this docstring
prompts = [
"Hello, my name is",
"The president of the United States is",
@@ -216,18 +217,13 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
openai_args = [
"--model", model_ref, "--dtype", "float16", "--load-format",
"tensorizer", "--model-loader-extra-config",
json.dumps(model_loader_extra_config), "--port", "8000"
json.dumps(model_loader_extra_config),
]
server = ServerRunner.remote(openai_args)
assert ray.get(server.ready.remote())
server = RemoteOpenAIServer(openai_args)
print("Server ready.")
client = openai.OpenAI(
base_url="http://localhost:8000/v1",
api_key="token-abc123",
)
client = server.get_client()
completion = client.completions.create(model=model_ref,
prompt="Hello, my name is",
max_tokens=5,