70 lines
1.7 KiB
Python
70 lines
1.7 KiB
Python
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from typing import List, Mapping, Optional, Union
|
|
|
|
from vllm import PoolingParams
|
|
from vllm.inputs import PromptInputs
|
|
from vllm.lora.request import LoRARequest
|
|
from vllm.outputs import RequestOutput
|
|
from vllm.prompt_adapter.request import PromptAdapterRequest
|
|
from vllm.sampling_params import SamplingParams
|
|
|
|
VLLM_RPC_SUCCESS_STR = "SUCCESS"
|
|
|
|
IPC_INPUT_EXT = "_input_socket"
|
|
IPC_OUTPUT_EXT = "_output_socket"
|
|
IPC_HEALTH_EXT = "_health_socket"
|
|
IPC_DATA_EXT = "_data_socket"
|
|
|
|
|
|
class MQEngineDeadError(RuntimeError):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class RPCProcessRequest:
|
|
inputs: PromptInputs
|
|
params: Union[SamplingParams, PoolingParams]
|
|
request_id: str
|
|
lora_request: Optional[LoRARequest] = None
|
|
trace_headers: Optional[Mapping[str, str]] = None
|
|
prompt_adapter_request: Optional[PromptAdapterRequest] = None
|
|
|
|
|
|
@dataclass
|
|
class RPCError:
|
|
request_id: Optional[str]
|
|
is_engine_errored: bool
|
|
exception: BaseException
|
|
|
|
|
|
@dataclass
|
|
class RPCAbortRequest:
|
|
request_id: str
|
|
|
|
|
|
class RPCStartupRequest(Enum):
|
|
IS_SERVER_READY = 1
|
|
|
|
|
|
@dataclass
|
|
class RPCStartupResponse:
|
|
tracing_enabled: bool
|
|
|
|
|
|
RPC_REQUEST_T = Union[RPCProcessRequest, RPCAbortRequest, RPCStartupRequest]
|
|
|
|
REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCError]
|
|
|
|
|
|
def ENGINE_DEAD_ERROR(
|
|
error: Optional[BaseException] = None) -> MQEngineDeadError:
|
|
if error is None:
|
|
return MQEngineDeadError(
|
|
"Engine loop is not running. Inspect the stacktrace to "
|
|
"find the original error")
|
|
|
|
return MQEngineDeadError(
|
|
"Engine loop is not running. Inspect the stacktrace to "
|
|
f"find the original error: {repr(error)}.")
|