Support prompt_embeds for pooling requests in output processor (#34904)

Signed-off-by: Li Zhang <lzhanga@amazon.com>
Co-authored-by: Li Zhang <lzhanga@amazon.com>
This commit is contained in:
Li
2026-02-20 22:57:38 -05:00
committed by GitHub
parent d38cd3dde5
commit 59c6233297

View File

@@ -337,16 +337,20 @@ class RequestState:
finished: bool,
kv_transfer_params: dict[str, Any] | None = None,
) -> RequestOutput | PoolingRequestOutput:
# If prompt embeds were used, put placeholder prompt token ids
prompt_token_ids = self.prompt_token_ids
if prompt_token_ids is None and self.prompt_embeds is not None:
prompt_token_ids = [0] * len(self.prompt_embeds)
assert prompt_token_ids is not None
first_output = outputs[0]
if isinstance(first_output, PoolingOutput):
assert len(outputs) == 1
# Prompt embeddings are currently not supported by pooling requests.
assert self.prompt_token_ids is not None
return PoolingRequestOutput(
request_id=external_req_id,
outputs=first_output,
num_cached_tokens=self.num_cached_tokens,
prompt_token_ids=self.prompt_token_ids,
prompt_token_ids=prompt_token_ids,
finished=finished,
)
assert self.logprobs_processor is not None
@@ -356,11 +360,6 @@ class RequestState:
else:
prompt_logprobs = self.logprobs_processor.prompt_logprobs
# If prompt embeds were used, put placeholder prompt token ids
prompt_token_ids = self.prompt_token_ids
if prompt_token_ids is None and self.prompt_embeds is not None:
prompt_token_ids = [0] * len(self.prompt_embeds)
return RequestOutput(
request_id=external_req_id, # request_id is what was provided externally
lora_request=self.lora_request,