[Doc][CI/Build] Update docs and tests to use vllm serve (#6431)

This commit is contained in:
Cyrus Leung
2024-07-17 15:43:21 +08:00
committed by GitHub
parent a19e8d3726
commit 5bf35a91e4
23 changed files with 155 additions and 175 deletions

View File

@@ -9,17 +9,17 @@ MODEL_NAME = "facebook/opt-125m"
@pytest.fixture(scope="module")
def server():
with RemoteOpenAIServer([
"--model",
MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"float16",
"--max-model-len",
"2048",
"--enforce-eager",
"--engine-use-ray"
]) as remote_server:
args = [
# use half precision for speed and memory savings in CI environment
"--dtype",
"float16",
"--max-model-len",
"2048",
"--enforce-eager",
"--engine-use-ray"
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server