[Misc] Deprecation Warning when setting --engine-use-ray (#7424)
Signed-off-by: Wallas Santos <wallashss@ibm.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
|
||||
"127.0.0.1", "--tokenizer-pool-size",
|
||||
str(tokenizer_pool_size)
|
||||
]
|
||||
|
||||
# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
|
||||
# to prevent `--engine-use-ray` raises an exception due to it deprecation
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
|
||||
|
||||
if engine_use_ray:
|
||||
commands.append("--engine-use-ray")
|
||||
if worker_use_ray:
|
||||
commands.append("--worker-use-ray")
|
||||
uvicorn_process = subprocess.Popen(commands)
|
||||
uvicorn_process = subprocess.Popen(commands, env=env_vars)
|
||||
yield
|
||||
uvicorn_process.terminate()
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
@@ -106,11 +107,16 @@ async def test_new_requests_event():
|
||||
assert engine.engine.add_request_calls == 3
|
||||
assert engine.engine.step_calls == old_step_calls + 1
|
||||
|
||||
# Allow deprecated engine_use_ray to not raise exception
|
||||
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
|
||||
|
||||
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
|
||||
assert engine.get_model_config() is not None
|
||||
assert engine.get_tokenizer() is not None
|
||||
assert engine.get_decoding_config() is not None
|
||||
|
||||
os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
|
||||
|
||||
|
||||
def test_asyncio_run():
|
||||
wait_for_gpu_memory_to_clear(
|
||||
|
||||
@@ -23,7 +23,11 @@ def server():
|
||||
str(chatml_jinja_path),
|
||||
]
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
# Allow `--engine-use-ray`, otherwise the launch of the server throw
|
||||
# an error due to try to use a deprecated feature
|
||||
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
|
||||
with RemoteOpenAIServer(MODEL_NAME, args,
|
||||
env_dict=env_dict) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user