Feature/vllm/input embedding completion api (#17590)

Signed-off-by: Andrew Sansom <andrew@protopia.ai>
Signed-off-by: Nan2018 <nan@protopia.ai>
Co-authored-by: 临景 <linjing.yx@alibaba-inc.com>
Co-authored-by: Bryce1010 <bryceyx@gmail.com>
Co-authored-by: Andrew Sansom <andrew@protopia.ai>
Co-authored-by: Andrew Sansom <qthequartermasterman@gmail.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Nan Qin
2025-05-18 22:18:05 -05:00
committed by GitHub
parent 9da1095daf
commit 221cfc2fea
10 changed files with 637 additions and 40 deletions

View File

@@ -2,6 +2,8 @@
from typing import Optional, Union
import torch
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.pooling_params import PoolingParams
@@ -23,6 +25,7 @@ class RequestLogger:
request_id: str,
prompt: Optional[str],
prompt_token_ids: Optional[list[int]],
prompt_embeds: Optional[torch.Tensor],
params: Optional[Union[SamplingParams, PoolingParams,
BeamSearchParams]],
lora_request: Optional[LoRARequest],
@@ -39,6 +42,8 @@ class RequestLogger:
logger.info(
"Received request %s: prompt: %r, "
"params: %s, prompt_token_ids: %s, "
"prompt_embeds shape: %s, "
"lora_request: %s, prompt_adapter_request: %s.", request_id,
prompt, params, prompt_token_ids, lora_request,
prompt_adapter_request)
prompt, params, prompt_token_ids,
prompt_embeds.shape if prompt_embeds is not None else None,
lora_request, prompt_adapter_request)