[Misc] Deprecation Warning when setting --engine-use-ray (#7424)

Signed-off-by: Wallas Santos <wallashss@ibm.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: youkaichao <youkaichao@126.com>
2024-08-14 13:44:27 -03:00
parent 67d115db08
commit 70b746efcf
7 changed files with 56 additions and 3 deletions
--- a/tests/async_engine/test_api_server.py
+++ b/tests/async_engine/test_api_server.py
@@ -1,3 +1,4 @@
+import os
 import subprocess
 import sys
 import time
@@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
        "127.0.0.1", "--tokenizer-pool-size",
        str(tokenizer_pool_size)
    ]
+
+    # Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
+    # to prevent `--engine-use-ray` raises an exception due to it deprecation
+    env_vars = os.environ.copy()
+    env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
+
    if engine_use_ray:
        commands.append("--engine-use-ray")
    if worker_use_ray:
        commands.append("--worker-use-ray")
-    uvicorn_process = subprocess.Popen(commands)
+    uvicorn_process = subprocess.Popen(commands, env=env_vars)
    yield
    uvicorn_process.terminate()

--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@@ -1,4 +1,5 @@
 import asyncio
+import os
 from dataclasses import dataclass

 import pytest
@@ -106,11 +107,16 @@ async def test_new_requests_event():
    assert engine.engine.add_request_calls == 3
    assert engine.engine.step_calls == old_step_calls + 1

+    # Allow deprecated engine_use_ray to not raise exception
+    os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
+
    engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
    assert engine.get_model_config() is not None
    assert engine.get_tokenizer() is not None
    assert engine.get_decoding_config() is not None

+    os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
+

 def test_asyncio_run():
    wait_for_gpu_memory_to_clear(
--- a/tests/async_engine/test_openapi_server_ray.py
+++ b/tests/async_engine/test_openapi_server_ray.py
@@ -23,7 +23,11 @@ def server():
        str(chatml_jinja_path),
    ]

-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+    # Allow `--engine-use-ray`, otherwise the launch of the server throw
+    # an error due to try to use a deprecated feature
+    env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
+    with RemoteOpenAIServer(MODEL_NAME, args,
+                            env_dict=env_dict) as remote_server:
        yield remote_server