[Doc][CI/Build] Update docs and tests to use vllm serve (#6431)

This commit is contained in:
Cyrus Leung
2024-07-17 15:43:21 +08:00
committed by GitHub
parent a19e8d3726
commit 5bf35a91e4
23 changed files with 155 additions and 175 deletions

View File

@@ -11,17 +11,17 @@ EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
@pytest.fixture(scope="module")
def embedding_server():
with RemoteOpenAIServer([
"--model",
EMBEDDING_MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
"--enforce-eager",
"--max-model-len",
"8192",
"--enforce-eager",
]) as remote_server:
args = [
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
"--enforce-eager",
"--max-model-len",
"8192",
"--enforce-eager",
]
with RemoteOpenAIServer(EMBEDDING_MODEL_NAME, args) as remote_server:
yield remote_server