[P/D] NIXL Integration (#17751)
Signed-off-by: ApostaC <yihua98@uchicago.edu> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Brent Salisbury <bsalisbu@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: ApostaC <yihua98@uchicago.edu> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Tyler Michael Smith <tysmith@redhat.com> Co-authored-by: Brent Salisbury <bsalisbu@redhat.com>
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import enum
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1 import KVTransferParams
|
||||
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.utils import is_list_of
|
||||
@@ -61,6 +62,15 @@ class Request:
|
||||
self.num_encoder_inputs = len(self.mm_inputs)
|
||||
self.has_encoder_inputs = self.num_encoder_inputs > 0
|
||||
|
||||
# P/D: KV transfer parameters (raw and parsed).
|
||||
raw_params = (None if sampling_params.extra_args is None
|
||||
else sampling_params.extra_args.get(
|
||||
"kv_transfer_params", None))
|
||||
self.raw_kv_transfer_params: Optional[dict[str, Any]] = raw_params
|
||||
# Each connector parses the raw dictionary and sets this
|
||||
# attr the first time that the request is processed.
|
||||
self.kv_transfer_params: Optional[KVTransferParams] = None
|
||||
|
||||
# Sanity check
|
||||
assert len(self.mm_inputs) == len(self.mm_positions)
|
||||
if self.mm_hashes:
|
||||
@@ -150,6 +160,7 @@ class RequestStatus(enum.IntEnum):
|
||||
"""Status of a request."""
|
||||
WAITING = enum.auto()
|
||||
WAITING_FOR_FSM = enum.auto()
|
||||
WAITING_FOR_REMOTE_KVS = enum.auto()
|
||||
RUNNING = enum.auto()
|
||||
PREEMPTED = enum.auto()
|
||||
# Note: anything after PREEMPTED will be considered
|
||||
|
||||
Reference in New Issue
Block a user