[Doc][CI/Build] Update docs and tests to use vllm serve (#6431)

This commit is contained in:
Cyrus Leung
2024-07-17 15:43:21 +08:00
committed by GitHub
parent a19e8d3726
commit 5bf35a91e4
23 changed files with 155 additions and 175 deletions

View File

@@ -214,12 +214,12 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
## Start OpenAI API server
openai_args = [
"--model", model_ref, "--dtype", "float16", "--load-format",
"--dtype", "float16", "--load-format",
"tensorizer", "--model-loader-extra-config",
json.dumps(model_loader_extra_config),
]
with RemoteOpenAIServer(openai_args) as server:
with RemoteOpenAIServer(model_ref, openai_args) as server:
print("Server ready.")
client = server.get_client()