[Doc][CI/Build] Update docs and tests to use vllm serve (#6431)

2024-07-17 15:43:21 +08:00
parent a19e8d3726
commit 5bf35a91e4
23 changed files with 155 additions and 175 deletions
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -15,8 +15,6 @@ from ..utils import RemoteOpenAIServer
    ])
 def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):
    pp_args = [
-        "--model",
-        MODEL_NAME,
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "bfloat16",
@@ -34,8 +32,6 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):
    #  schedule all workers in a node other than the head node,
    #  which can cause the test to fail.
    tp_args = [
-        "--model",
-        MODEL_NAME,
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "bfloat16",
@@ -53,7 +49,7 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME):

    results = []
    for args in [pp_args, tp_args]:
-        with RemoteOpenAIServer(args) as server:
+        with RemoteOpenAIServer(MODEL_NAME, args) as server:
            client = server.get_client()

            # test models list