[Doc][CI/Build] Update docs and tests to use vllm serve (#6431)

This commit is contained in:
Cyrus Leung
2024-07-17 15:43:21 +08:00
committed by GitHub
parent a19e8d3726
commit 5bf35a91e4
23 changed files with 155 additions and 175 deletions

View File

@@ -15,8 +15,6 @@ from ..utils import RemoteOpenAIServer
])
def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):
pp_args = [
"--model",
MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
@@ -34,8 +32,6 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):
# schedule all workers in a node other than the head node,
# which can cause the test to fail.
tp_args = [
"--model",
MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
@@ -53,7 +49,7 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):
results = []
for args in [pp_args, tp_args]:
with RemoteOpenAIServer(args) as server:
with RemoteOpenAIServer(MODEL_NAME, args) as server:
client = server.get_client()
# test models list