[V0 Deprecation] Deprecate --disable-frontend-multiprocessing (#37612)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -28,7 +28,7 @@ def server_args(request: pytest.FixtureRequest) -> list[str]:
|
||||
>>> @pytest.mark.parametrize(
|
||||
>>> "server_args",
|
||||
>>> [
|
||||
>>> ["--disable-frontend-multiprocessing"],
|
||||
>>> ["--max-model-len", "10100"],
|
||||
>>> [
|
||||
>>> "--model=NousResearch/Hermes-3-Llama-3.1-70B",
|
||||
>>> "--enable-auto-tool-choice",
|
||||
@@ -40,7 +40,7 @@ def server_args(request: pytest.FixtureRequest) -> list[str]:
|
||||
>>> ...
|
||||
|
||||
This will run `test_foo` twice with servers with:
|
||||
- `--disable-frontend-multiprocessing`
|
||||
- `--max-model-len 10100`
|
||||
- `--model=NousResearch/Hermes-3-Llama-3.1-70B --enable-auto-tool-choice`.
|
||||
|
||||
"""
|
||||
@@ -79,17 +79,6 @@ async def client(server):
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"server_args",
|
||||
[
|
||||
pytest.param([], id="default-frontend-multiprocessing"),
|
||||
pytest.param(
|
||||
["--disable-frontend-multiprocessing"],
|
||||
id="disable-frontend-multiprocessing",
|
||||
),
|
||||
],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_show_version(server: RemoteOpenAIServer):
|
||||
response = requests.get(server.url_for("version"))
|
||||
@@ -98,17 +87,6 @@ async def test_show_version(server: RemoteOpenAIServer):
|
||||
assert response.json() == {"version": VLLM_VERSION}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"server_args",
|
||||
[
|
||||
pytest.param([], id="default-frontend-multiprocessing"),
|
||||
pytest.param(
|
||||
["--disable-frontend-multiprocessing"],
|
||||
id="disable-frontend-multiprocessing",
|
||||
),
|
||||
],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_health(server: RemoteOpenAIServer):
|
||||
response = requests.get(server.url_for("health"))
|
||||
@@ -119,13 +97,7 @@ async def test_check_health(server: RemoteOpenAIServer):
|
||||
@pytest.mark.parametrize(
|
||||
"server_args",
|
||||
[
|
||||
pytest.param(
|
||||
["--max-model-len", "10100"], id="default-frontend-multiprocessing"
|
||||
),
|
||||
pytest.param(
|
||||
["--disable-frontend-multiprocessing", "--max-model-len", "10100"],
|
||||
id="disable-frontend-multiprocessing",
|
||||
),
|
||||
pytest.param(["--max-model-len", "10100"]),
|
||||
],
|
||||
indirect=True,
|
||||
)
|
||||
|
||||
@@ -50,7 +50,6 @@ def default_server_args():
|
||||
params=[
|
||||
"",
|
||||
"--enable-chunked-prefill",
|
||||
"--disable-frontend-multiprocessing",
|
||||
f"--show-hidden-metrics-for-version={PREV_MINOR_VERSION}",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -83,11 +83,8 @@ def example_prompt_embeds(hf_runner):
|
||||
return [_encode_embeds(item) for item in example_embeddings]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", params=["", "--disable-frontend-multiprocessing"])
|
||||
def server_with_prompt_embeds(default_server_args, request):
|
||||
if request.param:
|
||||
default_server_args.append(request.param)
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server_with_prompt_embeds(default_server_args):
|
||||
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@@ -150,7 +150,6 @@ async def test_shutdown_on_engine_failure():
|
||||
"0.05",
|
||||
"--max-num-seqs",
|
||||
"2",
|
||||
"--disable-frontend-multiprocessing",
|
||||
],
|
||||
# ROCm: Disable stdout/stderr pipe capture. Subprocess hangs when
|
||||
# stdout/stderr pipes are enabled during ROCm GPU initialization.
|
||||
|
||||
Reference in New Issue
Block a user