[P/D] NIXL Integration (#17751)
Signed-off-by: ApostaC <yihua98@uchicago.edu> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Brent Salisbury <bsalisbu@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: ApostaC <yihua98@uchicago.edu> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Tyler Michael Smith <tysmith@redhat.com> Co-authored-by: Brent Salisbury <bsalisbu@redhat.com>
This commit is contained in:
@@ -403,6 +403,9 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"access by 3rd parties, and long enough to be "
|
||||
"unpredictable (e.g., 43 characters base64-encoded, corresponding "
|
||||
"to 256 bit). Not supported by vLLM engine V0."))
|
||||
kv_transfer_params: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="KVTransfer parameters used for disaggregated serving.")
|
||||
|
||||
# doc: end-chat-completion-extra-params
|
||||
|
||||
@@ -540,7 +543,9 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
output_kind=RequestOutputKind.DELTA if self.stream \
|
||||
else RequestOutputKind.FINAL_ONLY,
|
||||
guided_decoding=guided_decoding,
|
||||
logit_bias=self.logit_bias)
|
||||
logit_bias=self.logit_bias,
|
||||
extra_args=({"kv_transfer_params": self.kv_transfer_params}
|
||||
if self.kv_transfer_params else None))
|
||||
|
||||
def _get_guided_json_from_tool(
|
||||
self) -> Optional[Union[str, dict, BaseModel]]:
|
||||
@@ -848,6 +853,10 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
" as strings of the form 'token_id:{token_id}' so that tokens "
|
||||
"that are not JSON-encodable can be identified."))
|
||||
|
||||
kv_transfer_params: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="KVTransfer parameters used for disaggregated serving.")
|
||||
|
||||
# doc: end-completion-extra-params
|
||||
|
||||
# Default sampling parameters for completion requests
|
||||
@@ -973,7 +982,9 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
else RequestOutputKind.FINAL_ONLY,
|
||||
guided_decoding=guided_decoding,
|
||||
logit_bias=self.logit_bias,
|
||||
allowed_token_ids=self.allowed_token_ids)
|
||||
allowed_token_ids=self.allowed_token_ids,
|
||||
extra_args=({"kv_transfer_params": self.kv_transfer_params}
|
||||
if self.kv_transfer_params else None))
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
@@ -1223,6 +1234,8 @@ class CompletionResponse(OpenAIBaseModel):
|
||||
model: str
|
||||
choices: list[CompletionResponseChoice]
|
||||
usage: UsageInfo
|
||||
kv_transfer_params: Optional[dict[str, Any]] = Field(
|
||||
default=None, description="KVTransfer parameters.")
|
||||
|
||||
|
||||
class CompletionResponseStreamChoice(OpenAIBaseModel):
|
||||
@@ -1412,6 +1425,8 @@ class ChatCompletionResponse(OpenAIBaseModel):
|
||||
choices: list[ChatCompletionResponseChoice]
|
||||
usage: UsageInfo
|
||||
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
|
||||
kv_transfer_params: Optional[dict[str, Any]] = Field(
|
||||
default=None, description="KVTransfer parameters.")
|
||||
|
||||
|
||||
class DeltaMessage(OpenAIBaseModel):
|
||||
|
||||
Reference in New Issue
Block a user