[CI/Build] Replace vllm.entrypoints.openai.api_server entrypoint with vllm serve command (#25967)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-03 01:04:57 +08:00
parent 3b279a84be
commit d00d652998
22 changed files with 101 additions and 66 deletions
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -786,13 +786,43 @@ def test_model_specification(parser_with_config, cli_config_file,
        parser_with_config.parse_args(['serve', '--config', cli_config_file])

    # Test using --model option raises error
-    with pytest.raises(
-            ValueError,
-            match=
-        ("With `vllm serve`, you should provide the model as a positional "
-         "argument or in a config file instead of via the `--model` option."),
-    ):
-        parser_with_config.parse_args(['serve', '--model', 'my-model'])
+    # with pytest.raises(
+    #         ValueError,
+    #         match=
+    #     ("With `vllm serve`, you should provide the model as a positional "
+    #      "argument or in a config file instead of via the `--model` option."),
+    # ):
+    #     parser_with_config.parse_args(['serve', '--model', 'my-model'])
+
+    # Test using --model option back-compatibility
+    # (when back-compatibility ends, the above test should be uncommented
+    # and the below test should be removed)
+    args = parser_with_config.parse_args([
+        'serve',
+        '--tensor-parallel-size',
+        '2',
+        '--model',
+        'my-model',
+        '--trust-remote-code',
+        '--port',
+        '8001',
+    ])
+    assert args.model is None
+    assert args.tensor_parallel_size == 2
+    assert args.trust_remote_code is True
+    assert args.port == 8001
+
+    args = parser_with_config.parse_args([
+        'serve',
+        '--tensor-parallel-size=2',
+        '--model=my-model',
+        '--trust-remote-code',
+        '--port=8001',
+    ])
+    assert args.model is None
+    assert args.tensor_parallel_size == 2
+    assert args.trust_remote_code is True
+    assert args.port == 8001

    # Test other config values are preserved
    args = parser_with_config.parse_args([