[Frontend] Move APIServerProcessManager target server fn (#38115)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -21,13 +21,14 @@ from typing import (
|
||||
)
|
||||
|
||||
import torch
|
||||
import uvloop
|
||||
from torch.autograd.profiler import record_function
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext, is_usage_stats_enabled, usage_message
|
||||
from vllm.utils.network_utils import get_open_port, get_open_zmq_ipc_path, get_tcp_uri
|
||||
from vllm.utils.system_utils import kill_process_tree
|
||||
from vllm.utils.system_utils import decorate_logs, kill_process_tree, set_process_title
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -166,20 +167,20 @@ class APIServerProcessManager:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_server_fn: Callable,
|
||||
listen_address: str,
|
||||
sock: Any,
|
||||
args: argparse.Namespace,
|
||||
num_servers: int,
|
||||
input_addresses: list[str],
|
||||
output_addresses: list[str],
|
||||
target_server_fn: Callable | None = None,
|
||||
stats_update_address: str | None = None,
|
||||
tensor_queue: Queue | None = None,
|
||||
):
|
||||
"""Initialize and start API server worker processes.
|
||||
|
||||
Args:
|
||||
target_server_fn: Function to call for each API server process
|
||||
target_server_fn: Override function to call for each API server process
|
||||
listen_address: Address to listen for client connections
|
||||
sock: Socket for client connections
|
||||
args: Command line arguments
|
||||
@@ -212,7 +213,7 @@ class APIServerProcessManager:
|
||||
client_config["tensor_queue"] = tensor_queue
|
||||
|
||||
proc = spawn_context.Process(
|
||||
target=target_server_fn,
|
||||
target=target_server_fn or run_api_server_worker_proc,
|
||||
name=f"ApiServer_{i}",
|
||||
args=(listen_address, sock, args, client_config),
|
||||
)
|
||||
@@ -231,6 +232,25 @@ class APIServerProcessManager:
|
||||
shutdown(self.processes, timeout=timeout)
|
||||
|
||||
|
||||
def run_api_server_worker_proc(
|
||||
listen_address, sock, args, client_config=None, **uvicorn_kwargs
|
||||
) -> None:
|
||||
"""Entrypoint for individual API server worker processes."""
|
||||
|
||||
from vllm.entrypoints.openai.api_server import run_server_worker
|
||||
|
||||
client_config = client_config or {}
|
||||
server_index = client_config.get("client_index", 0)
|
||||
|
||||
# Set process title and add process-specific prefix to stdout and stderr.
|
||||
set_process_title("APIServer", str(server_index))
|
||||
decorate_logs()
|
||||
|
||||
uvloop.run(
|
||||
run_server_worker(listen_address, sock, args, client_config, **uvicorn_kwargs)
|
||||
)
|
||||
|
||||
|
||||
def wait_for_completion_or_failure(
|
||||
api_server_manager: APIServerProcessManager,
|
||||
engine_manager: Union["CoreEngineProcManager", "CoreEngineActorManager"]
|
||||
|
||||
Reference in New Issue
Block a user